diff -Nur gcc-4.9.4.orig/gcc/c/gccspec.c gcc-4.9.4/gcc/c/gccspec.c
--- gcc-4.9.4.orig/gcc/c/gccspec.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/c/gccspec.c	2016-08-08 20:37:45.494269627 +0200
@@ -104,5 +104,12 @@
   return 0;  /* Not used for C.  */
 }
 
+/* Called before parsing the spec to tell which language driver is used.  */
+int
+lang_specific_is_c_plus_plus (void)
+{
+  return 0;
+}
+
 /* Number of extra output files that lang_specific_pre_link may generate.  */
 int lang_specific_extra_outfiles = 0;  /* Not used for C.  */
diff -Nur gcc-4.9.4.orig/gcc/c-family/c.opt gcc-4.9.4/gcc/c-family/c.opt
--- gcc-4.9.4.orig/gcc/c-family/c.opt	2014-04-03 15:41:55.000000000 +0200
+++ gcc-4.9.4/gcc/c-family/c.opt	2016-08-08 20:37:45.494269627 +0200
@@ -851,10 +851,6 @@
 fbuilding-libgcc
 C ObjC C++ ObjC++ Undocumented Var(flag_building_libgcc)
 
-fbuiltin
-C ObjC C++ ObjC++ Var(flag_no_builtin, 0)
-Recognize built-in functions
-
 fbuiltin-
 C ObjC C++ ObjC++ Joined
 
diff -Nur gcc-4.9.4.orig/gcc/c-family/cppspec.c gcc-4.9.4/gcc/c-family/cppspec.c
--- gcc-4.9.4.orig/gcc/c-family/cppspec.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/c-family/cppspec.c	2016-08-08 20:37:45.494269627 +0200
@@ -194,5 +194,12 @@
   return 0;  /* Not used for cpp.  */
 }
 
+/* Called before parsing the spec to tell which language driver is used.  */
+int
+lang_specific_is_c_plus_plus (void)
+{
+  return 0;
+}
+
 /* Number of extra output files that lang_specific_pre_link may generate.  */
 int lang_specific_extra_outfiles = 0;  /* Not used for cpp.  */
diff -Nur gcc-4.9.4.orig/gcc/common/config/nds32/nds32-common.c gcc-4.9.4/gcc/common/config/nds32/nds32-common.c
--- gcc-4.9.4.orig/gcc/common/config/nds32/nds32-common.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/common/config/nds32/nds32-common.c	2016-08-08 20:37:45.494269627 +0200
@@ -1,5 +1,5 @@
 /* Common hooks of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -74,15 +74,57 @@
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options nds32_option_optimization_table[] =
 {
-  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
-  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+#ifdef TARGET_DEFAULT_NO_MATH_ERRNO
+  /* Under some configuration, we would like to use -fno-math-errno by default
+     at all optimization levels for performance and code size consideration.
+     Please check gcc/config.gcc for more implementation details.  */
+  { OPT_LEVELS_ALL,               OPT_fmath_errno,         NULL, 0 },
+#endif
+#ifndef TARGET_LINUX_ABI
+  /* Disable -fdelete-null-pointer-checks by default in ELF toolchain.  */
+  { OPT_LEVELS_ALL,               OPT_flag_delete_null_pointer_checks,
+							   NULL, 0 },
+#endif
+  /* Enable -fomit-frame-pointer by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mrelax-hint by default at all optimization levels.  */
+  { OPT_LEVELS_ALL,               OPT_mrelax_hint,         NULL, 1 },
+  /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og.  */
+  { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align,       NULL, 1 },
   /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
-  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+  { OPT_LEVELS_SIZE,              OPT_mv3push,             NULL, 1 },
+  /* Enable -mload-store-opt by default at -Os.  */
+  { OPT_LEVELS_SIZE,              OPT_mload_store_opt,     NULL, 1 },
+  /* Enable -mregrename by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mregrename,          NULL, 1 },
+  /* Enable -mgcse by default at -O1 and above.  */
+  { OPT_LEVELS_1_PLUS,            OPT_mgcse,               NULL, 1 },
+#ifdef TARGET_OS_DEFAULT_IFC
+  /* Enable -mifc by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mifc,                NULL, 1 },
+#endif
+#ifdef TARGET_OS_DEFAULT_EX9
+  /* Enable -mex9 by default at -Os, but it is useless under V2/V3M ISA.  */
+  { OPT_LEVELS_SIZE,              OPT_mex9,                NULL, 1 },
+#endif
 
-  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+  { OPT_LEVELS_NONE,              0,                       NULL, 0 }
 };
 
 /* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+static enum unwind_info_type
+nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  if (TARGET_LINUX_ABI)
+    return UI_DWARF2;
+
+  return UI_SJLJ;
+}
+
+/* ------------------------------------------------------------------------ */
+
 
 /* Run-time Target Specification.  */
 
@@ -95,16 +137,22 @@
 
    Other MASK_XXX flags are set individually.
    By default we enable
-     TARGET_GP_DIRECT: Generate gp-imply instruction.
-     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
-     TARGET_PERF_EXT : Generate performance extention instrcution.
-     TARGET_CMOV     : Generate conditional move instruction.  */
+     TARGET_16_BIT     : Generate 16/32 bit mixed length instruction.
+     TARGET_EXT_PERF   : Generate performance extention instrcution.
+     TARGET_EXT_PERF2  : Generate performance extention version 2 instrcution.
+     TARGET_EXT_STRING : Generate string extention instrcution.
+     TARGET_HW_ABS     : Generate hardware abs instruction.
+     TARGET_CMOV       : Generate conditional move instruction.  */
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS		\
   (TARGET_CPU_DEFAULT				\
-   | MASK_GP_DIRECT				\
+   | TARGET_DEFAULT_FPU_ISA			\
+   | TARGET_DEFAULT_FPU_FMA			\
    | MASK_16_BIT				\
-   | MASK_PERF_EXT				\
+   | MASK_EXT_PERF				\
+   | MASK_EXT_PERF2				\
+   | MASK_EXT_STRING				\
+   | MASK_HW_ABS				\
    | MASK_CMOV)
 
 #undef TARGET_HANDLE_OPTION
@@ -117,7 +165,7 @@
 /* Defining the Output Assembler Language.  */
 
 #undef TARGET_EXCEPT_UNWIND_INFO
-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info
 
 /* ------------------------------------------------------------------------ */
 
diff -Nur gcc-4.9.4.orig/gcc/common.opt gcc-4.9.4/gcc/common.opt
--- gcc-4.9.4.orig/gcc/common.opt	2015-02-26 03:43:52.000000000 +0100
+++ gcc-4.9.4/gcc/common.opt	2016-08-08 20:37:45.494269627 +0200
@@ -898,6 +898,10 @@
 Common Report Var(flag_btr_bb_exclusive) Optimization
 Restrict target load migration not to re-use registers in any basic block
 
+fbuiltin
+Common Var(flag_no_builtin, 0)
+Recognize built-in functions
+
 fcall-saved-
 Common Joined RejectNegative Var(common_deferred_options) Defer
 -fcall-saved-<register>	Mark <register> as being preserved across functions
@@ -1160,7 +1164,7 @@
 Common
 
 ffat-lto-objects
-Common Var(flag_fat_lto_objects)
+Common Var(flag_fat_lto_objects) Init(1)
 Output lto objects containing both the intermediate language and binary output.
 
 ffinite-math-only
@@ -2202,6 +2206,10 @@
 Common Report Var(flag_tree_sra) Optimization
 Perform scalar replacement of aggregates
 
+ftree-switch-shortcut
+Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization
+Do fancy switch statement shortcutting
+
 ftree-ter
 Common Report Var(flag_tree_ter) Optimization
 Replace temporary expressions in the SSA->normal pass
diff -Nur gcc-4.9.4.orig/gcc/config/arm/arm.h gcc-4.9.4/gcc/config/arm/arm.h
--- gcc-4.9.4.orig/gcc/config/arm/arm.h	2016-03-29 15:32:37.000000000 +0200
+++ gcc-4.9.4/gcc/config/arm/arm.h	2016-08-08 20:37:45.494269627 +0200
@@ -1162,7 +1162,7 @@
 
 /* Tell IRA to use the order we define rather than messing it up with its
    own cost calculations.  */
-#define HONOR_REG_ALLOC_ORDER
+#define HONOR_REG_ALLOC_ORDER 1
 
 /* Interrupt functions can only use registers that have already been
    saved by the prologue, even if they would normally be
diff -Nur gcc-4.9.4.orig/gcc/config/i386/host-cygwin.c gcc-4.9.4/gcc/config/i386/host-cygwin.c
--- gcc-4.9.4.orig/gcc/config/i386/host-cygwin.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/i386/host-cygwin.c	2016-08-08 20:37:45.494269627 +0200
@@ -62,7 +62,7 @@
       fatal_error ("can%'t extend PCH file: %m");
   }
 
-  base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+  base = mmap ((void *) 0x60000000, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 
   if (base == MAP_FAILED)
     base = NULL;
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch gcc-4.9.4/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch
--- gcc-4.9.4.orig/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/0001-Add-option-m16bit-mno-16bit-for-backward-compatibili.patch	2016-08-08 20:37:45.494269627 +0200
@@ -0,0 +1,26 @@
+From c8f442699258adea1df44e6a11906b6e98dbb793 Mon Sep 17 00:00:00 2001
+From: Kito Cheng <kito@andestech.com>
+Date: Mon, 7 Dec 2015 17:50:51 +0800
+Subject: [PATCH 1/2] Add option -m16bit/-mno-16bit for backward compatibility
+
+---
+ gcc/config/nds32/nds32.opt | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt
+index ed3ccb9..78119a3 100644
+--- a/gcc/config/nds32/nds32.opt
++++ b/gcc/config/nds32/nds32.opt
+@@ -129,6 +129,9 @@ m16-bit
+ Target Report Mask(16_BIT)
+ Generate 16-bit instructions.
+ 
++m16bit
++Target Alias(m16-bit) Undocumented
++
+ mrelax-hint
+ Target Report Mask(RELAX_HINT)
+ Insert relax hint for linker to do relaxation.
+-- 
+2.4.3
+
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch gcc-4.9.4/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch
--- gcc-4.9.4.orig/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/0002-Use-default-crt-begin-end-.o-which-provide-by-gcc-in.patch	2016-08-08 20:37:45.494269627 +0200
@@ -0,0 +1,142 @@
+From 8079ff97a5ea42ac56765bce2b4855d24dcc7b10 Mon Sep 17 00:00:00 2001
+From: Kito Cheng <kito@andestech.com>
+Date: Mon, 7 Dec 2015 10:25:03 +0800
+Subject: [PATCH 2/2] Use default crt[begin|end]*.o which provide by gcc in
+ linux toolchain
+
+---
+ gcc/config/nds32/elf.h   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
+ gcc/config/nds32/nds32.h | 46 ----------------------------------------------
+ libgcc/config.host       |  5 ++---
+ 3 files changed, 48 insertions(+), 49 deletions(-)
+
+diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
+index 808fd44..67e5b0e 100644
+--- a/gcc/config/nds32/elf.h
++++ b/gcc/config/nds32/elf.h
+@@ -34,3 +34,49 @@
+   NDS32_RELAX_SPEC \
+   NDS32_IFC_SPEC \
+   NDS32_EX9_SPEC
++
++#define LIB_SPEC \
++  " -lc -lgloss"
++
++#define LIBGCC_SPEC \
++  " -lgcc"
++
++/* The option -mno-ctor-dtor can disable constructor/destructor feature
++   by applying different crt stuff.  In the convention, crt0.o is the
++   startup file without constructor/destructor;
++   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
++   startup files with constructor/destructor.
++   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
++   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
++   currently provided by GCC for nds32 target.
++
++   For nds32 target so far:
++   If -mno-ctor-dtor, we are going to link
++   "crt0.o [user objects]".
++   If -mctor-dtor, we are going to link
++   "crt1.o crtbegin1.o [user objects] crtend1.o".
++
++   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
++   default behavior.  Check gcc/config.gcc for more information.  */
++#ifdef TARGET_DEFAULT_CTOR_DTOR
++  #define STARTFILE_SPEC \
++    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{!mno-ctor-dtor:crtend1.o%s}"
++#else
++  #define STARTFILE_SPEC \
++    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
++    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
++    " %{mcrt-arg:crtarg.o%s}"
++  #define ENDFILE_SPEC \
++    " %{mctor-dtor|coverage:crtend1.o%s}"
++#endif
++
++#define STARTFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
++  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
++  " %{mcrt-arg:crtarg.o%s}"
++#define ENDFILE_CXX_SPEC \
++  " %{!mno-ctor-dtor:crtend1.o%s}"
+diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
+index 954f54f..19978a0 100644
+--- a/gcc/config/nds32/nds32.h
++++ b/gcc/config/nds32/nds32.h
+@@ -984,52 +984,6 @@ enum nds32_builtins
+   " %{mext-zol:-mzol-ext}" \
+   " %{O|O1|O2|O3|Ofast:-O1;:-Os}"
+ 
+-#define LIB_SPEC \
+-  " -lc -lgloss"
+-
+-#define LIBGCC_SPEC \
+-  " -lgcc"
+-
+-/* The option -mno-ctor-dtor can disable constructor/destructor feature
+-   by applying different crt stuff.  In the convention, crt0.o is the
+-   startup file without constructor/destructor;
+-   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+-   startup files with constructor/destructor.
+-   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+-   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+-   currently provided by GCC for nds32 target.
+-
+-   For nds32 target so far:
+-   If -mno-ctor-dtor, we are going to link
+-   "crt0.o [user objects]".
+-   If -mctor-dtor, we are going to link
+-   "crt1.o crtbegin1.o [user objects] crtend1.o".
+-
+-   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+-   default behavior.  Check gcc/config.gcc for more information.  */
+-#ifdef TARGET_DEFAULT_CTOR_DTOR
+-  #define STARTFILE_SPEC \
+-    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+-    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+-    " %{mcrt-arg:crtarg.o%s}"
+-  #define ENDFILE_SPEC \
+-    " %{!mno-ctor-dtor:crtend1.o%s}"
+-#else
+-  #define STARTFILE_SPEC \
+-    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+-    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+-    " %{mcrt-arg:crtarg.o%s}"
+-  #define ENDFILE_SPEC \
+-    " %{mctor-dtor|coverage:crtend1.o%s}"
+-#endif
+-
+-#define STARTFILE_CXX_SPEC \
+-  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+-  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+-  " %{mcrt-arg:crtarg.o%s}"
+-#define ENDFILE_CXX_SPEC \
+-  " %{!mno-ctor-dtor:crtend1.o%s}"
+-
+ /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we
+    configure gcc with --target=nds32be-* setting.
+    Check gcc/config.gcc for more information.  */
+diff --git a/libgcc/config.host b/libgcc/config.host
+index d980d8a..3710504 100644
+--- a/libgcc/config.host
++++ b/libgcc/config.host
+@@ -882,9 +882,8 @@ msp430*-*-elf)
+ nds32*-linux*)
+ 	# Basic makefile fragment and extra_parts for crt stuff.
+ 	# We also append c-isr library implementation.
+-	tmake_file="${tmake_file} nds32/t-nds32 t-slibgcc-libgcc"
+-	extra_parts="crtbegin1.o crtend1.o crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o "
+-	tmake_file="${tmake_file} nds32/t-nds32-glibc t-softfp-sfdf t-softfp"
++	tmake_file="${tmake_file} t-slibgcc-libgcc"
++	tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp"
+ 	# Append library definition makefile fragment according to --with-nds32-lib=X setting.
+ 	case "${with_nds32_lib}" in
+ 	"" )
+-- 
+2.4.3
+
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/constants.md gcc-4.9.4/gcc/config/nds32/constants.md
--- gcc-4.9.4.orig/gcc/config/nds32/constants.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/constants.md	2016-08-08 20:37:45.494269627 +0200
@@ -1,5 +1,5 @@
 ;; Constant defintions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -23,24 +23,191 @@
 (define_constants
   [(R8_REGNUM  8)
    (TA_REGNUM 15)
+   (TP_REGNUM 25)
    (FP_REGNUM 28)
    (GP_REGNUM 29)
    (LP_REGNUM 30)
    (SP_REGNUM 31)
+   (LB_REGNUM 98)
+   (LE_REGNUM 99)
+   (LC_REGNUM 100)
   ])
 
 
+;; The unpec operation index.
+(define_c_enum "unspec_element" [
+  UNSPEC_COPYSIGN
+  UNSPEC_FCPYNSD
+  UNSPEC_FCPYNSS
+  UNSPEC_FCPYSD
+  UNSPEC_FCPYSS
+  UNSPEC_AVE
+  UNSPEC_BCLR
+  UNSPEC_BSET
+  UNSPEC_BTGL
+  UNSPEC_BTST
+  UNSPEC_CLIP
+  UNSPEC_CLIPS
+  UNSPEC_CLZ
+  UNSPEC_CLO
+  UNSPEC_ABS
+  UNSPEC_MAX
+  UNSPEC_MIN
+  UNSPEC_PBSAD
+  UNSPEC_PBSADA
+  UNSPEC_BSE
+  UNSPEC_BSE_2
+  UNSPEC_BSP
+  UNSPEC_BSP_2
+  UNSPEC_FFB
+  UNSPEC_FFMISM
+  UNSPEC_FLMISM
+  UNSPEC_KADDW
+  UNSPEC_KSUBW
+  UNSPEC_KADDH
+  UNSPEC_KSUBH
+  UNSPEC_KDMBB
+  UNSPEC_KDMBT
+  UNSPEC_KDMTB
+  UNSPEC_KDMTT
+  UNSPEC_KHMBB
+  UNSPEC_KHMBT
+  UNSPEC_KHMTB
+  UNSPEC_KHMTT
+  UNSPEC_KSLRAW
+  UNSPEC_KSLRAWU
+  UNSPEC_RDOV
+  UNSPEC_CLROV
+  UNSPEC_SVA
+  UNSPEC_SVS
+  UNSPEC_WSBH
+  UNSPEC_LWUP
+  UNSPEC_LBUP
+  UNSPEC_SWUP
+  UNSPEC_SBUP
+  UNSPEC_LMWZB
+  UNSPEC_SMWZB
+  UNSPEC_UALOAD_HW
+  UNSPEC_UALOAD_W
+  UNSPEC_UALOAD_DW
+  UNSPEC_UASTORE_HW
+  UNSPEC_UASTORE_W
+  UNSPEC_UASTORE_DW
+  UNSPEC_GOTINIT
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_PLT
+  UNSPEC_TLSGD
+  UNSPEC_TLSLD
+  UNSPEC_TLSIE
+  UNSPEC_TLSLE
+  UNSPEC_ROUND
+  UNSPEC_VEC_COMPARE
+  UNSPEC_KHM
+  UNSPEC_KHMX
+  UNSPEC_CLIP_OV
+  UNSPEC_CLIPS_OV
+  UNSPEC_BITREV
+  UNSPEC_KABS
+  UNSPEC_LOOP_END
+  UNSPEC_TLS_DESC
+  UNSPEC_TLS_IE
+])
+
+
 ;; The unspec_volatile operation index.
 (define_c_enum "unspec_volatile_element" [
-  UNSPEC_VOLATILE_FUNC_RETURN
+  UNSPEC_VOLATILE_EH_RETURN
   UNSPEC_VOLATILE_ISYNC
   UNSPEC_VOLATILE_ISB
+  UNSPEC_VOLATILE_DSB
+  UNSPEC_VOLATILE_MSYNC
+  UNSPEC_VOLATILE_MSYNC_ALL
+  UNSPEC_VOLATILE_MSYNC_STORE
   UNSPEC_VOLATILE_MFSR
   UNSPEC_VOLATILE_MFUSR
   UNSPEC_VOLATILE_MTSR
   UNSPEC_VOLATILE_MTUSR
   UNSPEC_VOLATILE_SETGIE_EN
   UNSPEC_VOLATILE_SETGIE_DIS
+  UNSPEC_VOLATILE_FMFCSR
+  UNSPEC_VOLATILE_FMTCSR
+  UNSPEC_VOLATILE_FMFCFG
+  UNSPEC_VOLATILE_JR_ITOFF
+  UNSPEC_VOLATILE_JR_TOFF
+  UNSPEC_VOLATILE_JRAL_ITON
+  UNSPEC_VOLATILE_JRAL_TON
+  UNSPEC_VOLATILE_RET_ITOFF
+  UNSPEC_VOLATILE_RET_TOFF
+  UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_GRANT
+  UNSPEC_VOLATILE_STANDBY_WAKE_DONE
+  UNSPEC_VOLATILE_TEQZ
+  UNSPEC_VOLATILE_TNEZ
+  UNSPEC_VOLATILE_TRAP
+  UNSPEC_VOLATILE_SETEND_BIG
+  UNSPEC_VOLATILE_SETEND_LITTLE
+  UNSPEC_VOLATILE_BREAK
+  UNSPEC_VOLATILE_SYSCALL
+  UNSPEC_VOLATILE_NOP
+  UNSPEC_VOLATILE_RES_DEP
+  UNSPEC_VOLATILE_DATA_DEP
+  UNSPEC_VOLATILE_GET_CURRENT_SP
+  UNSPEC_VOLATILE_SET_CURRENT_SP
+  UNSPEC_VOLATILE_LLW
+  UNSPEC_VOLATILE_SCW
+  UNSPEC_VOLATILE_CCTL_L1D_INVALALL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL
+  UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL
+  UNSPEC_VOLATILE_CCTL_IDX_WRITE
+  UNSPEC_VOLATILE_CCTL_IDX_READ
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1
+  UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA
+  UNSPEC_VOLATILE_CCTL_IDX_WBINVAL
+  UNSPEC_VOLATILE_CCTL_VA_LCK
+  UNSPEC_VOLATILE_DPREF_QW
+  UNSPEC_VOLATILE_DPREF_HW
+  UNSPEC_VOLATILE_DPREF_W
+  UNSPEC_VOLATILE_DPREF_DW
+  UNSPEC_VOLATILE_TLBOP_TRD
+  UNSPEC_VOLATILE_TLBOP_TWR
+  UNSPEC_VOLATILE_TLBOP_RWR
+  UNSPEC_VOLATILE_TLBOP_RWLK
+  UNSPEC_VOLATILE_TLBOP_UNLK
+  UNSPEC_VOLATILE_TLBOP_PB
+  UNSPEC_VOLATILE_TLBOP_INV
+  UNSPEC_VOLATILE_TLBOP_FLUA
+  UNSPEC_VOLATILE_ENABLE_INT
+  UNSPEC_VOLATILE_DISABLE_INT
+  UNSPEC_VOLATILE_SET_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_SWINT
+  UNSPEC_VOLATILE_CLR_PENDING_HWINT
+  UNSPEC_VOLATILE_GET_ALL_PENDING_INT
+  UNSPEC_VOLATILE_GET_PENDING_INT
+  UNSPEC_VOLATILE_SET_INT_PRIORITY
+  UNSPEC_VOLATILE_GET_INT_PRIORITY
+  UNSPEC_VOLATILE_SET_TRIG_LEVEL
+  UNSPEC_VOLATILE_SET_TRIG_EDGE
+  UNSPEC_VOLATILE_GET_TRIG_TYPE
+  UNSPEC_VOLATILE_RELAX_GROUP
+  UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN
+  UNSPEC_VOLATILE_INNERMOST_LOOP_END
+  UNSPEC_VOLATILE_MAYBE_ALIGN
+  UNSPEC_VOLATILE_OMIT_FP_BEGIN
+  UNSPEC_VOLATILE_OMIT_FP_END
+  UNSPEC_VOLATILE_RETURN_ADDRESS
+  UNSPEC_VOLATILE_POP25_RETURN
+  UNSPEC_VOLATILE_UPDATE_GP
+  UNSPEC_VOLATILE_SIGNATURE_BEGIN
+  UNSPEC_VOLATILE_SIGNATURE_END
+  UNSPEC_VOLATILE_NO_HWLOOP
+  UNSPEC_VOLATILE_NO_IFC_BEGIN
+  UNSPEC_VOLATILE_NO_IFC_END
+  UNSPEC_VOLATILE_NO_EX9_BEGIN
+  UNSPEC_VOLATILE_NO_EX9_END
+  UNSPEC_VOLATILE_UNALIGNED_FEATURE
+  UNSPEC_VOLATILE_ENABLE_UNALIGNED
+  UNSPEC_VOLATILE_DISABLE_UNALIGNED
 ])
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/constraints.md gcc-4.9.4/gcc/config/nds32/constraints.md
--- gcc-4.9.4.orig/gcc/config/nds32/constraints.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/constraints.md	2016-08-08 20:37:45.498269782 +0200
@@ -1,5 +1,5 @@
 ;; Constraint definitions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -25,9 +25,6 @@
 ;; Machine-dependent floating: G H
 
 
-(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS"
-  "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA")
-
 (define_register_constraint "l" "LOW_REGS"
   "LOW register class $r0 ~ $r7")
 
@@ -41,9 +38,59 @@
 (define_register_constraint "t" "R15_TA_REG"
   "Temporary Assist register $ta (i.e. $r15)")
 
+(define_register_constraint "e" "R8_REG"
+  "Function Entry register $r8)")
+
 (define_register_constraint "k" "STACK_REG"
   "Stack register $sp")
 
+(define_register_constraint "v" "R5_REG"
+  "Register $r5")
+
+(define_register_constraint "x" "FRAME_POINTER_REG"
+  "Frame pointer register $fp")
+
+(define_register_constraint "f"
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ? FP_REGS : NO_REGS"
+ "The Floating point registers $fs0 ~ $fs31")
+
+(define_register_constraint "A" "LOOP_REGS"
+  "Loop register class")
+
+(define_constraint "Iv00"
+  "Constant value 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Iv01"
+  "Constant value 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "Iv02"
+  "Constant value 2"
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "Iv04"
+  "Constant value 4"
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "Iv08"
+  "Constant value 8"
+  (and (match_code "const_int")
+       (match_test "ival == 8")))
+
+(define_constraint "Iu01"
+  "Unsigned immediate 1-bit value"
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 0")))
+
+(define_constraint "Iu02"
+  "Unsigned immediate 2-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 2) && ival >= 0")))
 
 (define_constraint "Iu03"
   "Unsigned immediate 3-bit value"
@@ -65,6 +112,11 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 4) && ival >= -(1 << 4)")))
 
+(define_constraint "Cs05"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 4), (1 << 4))")))
+
 (define_constraint "Iu05"
   "Unsigned immediate 5-bit value"
   (and (match_code "const_int")
@@ -75,6 +127,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -31, 0)")))
 
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6) && ival >= 0")))
+
 ;; Ip05 is special and dedicated for v3 movpi45 instruction.
 ;; movpi45 has imm5u field but the range is 16 ~ 47.
 (define_constraint "Ip05"
@@ -84,10 +141,10 @@
 		    && ival >= (0 + 16)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
 
-(define_constraint "Iu06"
+(define_constraint "IU06"
   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
   (and (match_code "const_int")
-       (match_test "ival < (1 << 6)
+       (match_test "ival < (1 << 8)
 		    && ival >= 0
 		    && (ival % 4 == 0)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
@@ -103,6 +160,11 @@
        (match_test "ival < (1 << 9) && ival >= 0")))
 
 
+(define_constraint "Is08"
+  "Signed immediate 8-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 7) && ival >= -(1 << 7)")))
+
 (define_constraint "Is10"
   "Signed immediate 10-bit value"
   (and (match_code "const_int")
@@ -113,6 +175,10 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 10) && ival >= -(1 << 10)")))
 
+(define_constraint "Is14"
+  "Signed immediate 14-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 13) && ival >= -(1 << 13)")))
 
 (define_constraint "Is15"
   "Signed immediate 15-bit value"
@@ -194,12 +260,21 @@
   (and (match_code "const_int")
        (match_test "ival < (1 << 19) && ival >= -(1 << 19)")))
 
+(define_constraint "Cs20"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_double")
+       (match_test "nds32_const_double_range_ok_p (op, SFmode, -(1 << 19), (1 << 19))")))
 
 (define_constraint "Ihig"
   "The immediate value that can be simply set high 20-bit"
   (and (match_code "const_int")
        (match_test "(ival != 0) && ((ival & 0xfff) == 0)")))
 
+(define_constraint "Chig"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "high")
+       (match_test "GET_CODE (XEXP (op, 0)) == CONST_DOUBLE")))
+
 (define_constraint "Izeb"
   "The immediate value 0xff"
   (and (match_code "const_int")
@@ -213,12 +288,12 @@
 (define_constraint "Ixls"
   "The immediate value 0x01"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x1)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x1)")))
 
 (define_constraint "Ix11"
   "The immediate value 0x7ff"
   (and (match_code "const_int")
-       (match_test "TARGET_PERF_EXT && (ival == 0x7ff)")))
+       (match_test "TARGET_EXT_PERF && (ival == 0x7ff)")))
 
 (define_constraint "Ibms"
   "The immediate value with power of 2"
@@ -232,23 +307,70 @@
        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
 		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
 
+(define_constraint "CVp5"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVp5_p (op)")))
+
+(define_constraint "CVs5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs5_p (op)")))
+
+(define_constraint "CVs2"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs2_p (op)")))
+
+(define_constraint "CVhi"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVhi_p (op)")))
 
 (define_memory_constraint "U33"
   "Memory constraint for 333 format"
   (and (match_code "mem")
-       (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
+       (match_test "nds32_mem_format (op) == ADDRESS_POST_INC_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_POST_MODIFY_LO_REG_IMM3U
+		    || nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U")))
 
 (define_memory_constraint "U45"
   "Memory constraint for 45 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_REG)
-		    && (GET_MODE (op) == SImode)")))
+		    && ((GET_MODE (op) == SImode)
+		       || (GET_MODE (op) == SFmode))")))
+
+(define_memory_constraint "Ufe"
+  "Memory constraint for fe format"
+  (and (match_code "mem")
+       (match_test "nds32_mem_format (op) == ADDRESS_R8_IMM7U
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
 
 (define_memory_constraint "U37"
   "Memory constraint for 37 format"
   (and (match_code "mem")
        (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U
 		    || nds32_mem_format (op) == ADDRESS_FP_IMM7U)
-		    && (GET_MODE (op) == SImode)")))
+		    && (GET_MODE (op) == SImode
+			|| GET_MODE (op) == SFmode)")))
+
+(define_memory_constraint "Umw"
+  "Memory constraint for lwm/smw"
+  (and (match_code "mem")
+       (match_test "nds32_valid_smw_lwm_base_p (op)")))
+
+(define_memory_constraint "Da"
+  "Memory constraint for non-offset loads/stores"
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    || (GET_CODE (XEXP (op, 0)) == POST_INC)")))
+
+(define_memory_constraint "Q"
+  "Memory constraint for no symbol_ref and const"
+  (and (match_code "mem")
+       (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+		     && nds32_float_mem_operand_p (op)")))
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/elf.h gcc-4.9.4/gcc/config/nds32/elf.h
--- gcc-4.9.4.orig/gcc/config/nds32/elf.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/elf.h	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,82 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 0
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+  " %{G*}" \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC
+
+#define LIB_SPEC \
+  " -lc -lgloss"
+
+#define LIBGCC_SPEC \
+  " -lgcc"
+
+/* The option -mno-ctor-dtor can disable constructor/destructor feature
+   by applying different crt stuff.  In the convention, crt0.o is the
+   startup file without constructor/destructor;
+   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+   startup files with constructor/destructor.
+   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+   currently provided by GCC for nds32 target.
+
+   For nds32 target so far:
+   If -mno-ctor-dtor, we are going to link
+   "crt0.o [user objects]".
+   If -mctor-dtor, we are going to link
+   "crt1.o crtbegin1.o [user objects] crtend1.o".
+
+   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+   default behavior.  Check gcc/config.gcc for more information.  */
+#ifdef TARGET_DEFAULT_CTOR_DTOR
+  #define STARTFILE_SPEC \
+    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{!mno-ctor-dtor:crtend1.o%s}"
+#else
+  #define STARTFILE_SPEC \
+    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{mctor-dtor|coverage:crtend1.o%s}"
+#endif
+
+#define STARTFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+  " %{mcrt-arg:crtarg.o%s}"
+#define ENDFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crtend1.o%s}"
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/iterators.md gcc-4.9.4/gcc/config/nds32/iterators.md
--- gcc-4.9.4.orig/gcc/config/nds32/iterators.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/iterators.md	2016-08-08 20:37:45.498269782 +0200
@@ -1,6 +1,6 @@
 ;; Code and mode itertator and attribute definitions
 ;; of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -26,30 +26,99 @@
 ;; A list of integer modes that are up to one word long.
 (define_mode_iterator QIHISI [QI HI SI])
 
+;; A list of integer modes for one word and double word.
+(define_mode_iterator SIDI [SI DI])
+
 ;; A list of integer modes that are up to one half-word long.
 (define_mode_iterator QIHI [QI HI])
 
 ;; A list of the modes that are up to double-word long.
 (define_mode_iterator DIDF [DI DF])
 
+;; A list of the modes that are up to one word long vector.
+(define_mode_iterator VQIHI [V4QI V2HI])
+
+;; A list of the modes that are up to one word long vector and scalar.
+(define_mode_iterator VSQIHI [V4QI V2HI QI HI])
+
+(define_mode_iterator VSQIHIDI [V4QI V2HI QI HI DI])
+
+(define_mode_iterator VQIHIDI [V4QI V2HI DI])
+
+;; A list of the modes that are up to one word long vector
+;; and scalar for HImode.
+(define_mode_iterator VSHI [V2HI HI])
+
+;; A list of the modes that are up to double-word long.
+(define_mode_iterator ANYF [(SF "TARGET_FPU_SINGLE")
+			    (DF "TARGET_FPU_DOUBLE")])
 
 ;;----------------------------------------------------------------------------
 ;; Mode attributes.
 ;;----------------------------------------------------------------------------
 
-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (SF "s") (DF "d")])
 
-(define_mode_attr byte [(QI "1") (HI "2") (SI "4")])
+(define_mode_attr byte [(QI "1") (HI "2") (SI "4") (V4QI "4") (V2HI "4")])
 
+(define_mode_attr bits [(V4QI "8") (QI "8") (V2HI "16") (HI "16") (DI "64")])
+
+(define_mode_attr VELT [(V4QI "QI") (V2HI "HI")])
 
 ;;----------------------------------------------------------------------------
 ;; Code iterators.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
+
+(define_code_iterator shifts [ashift ashiftrt lshiftrt])
+
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+
+(define_code_iterator sat_plus [ss_plus us_plus])
+
+(define_code_iterator all_plus [plus ss_plus us_plus])
+
+(define_code_iterator sat_minus [ss_minus us_minus])
+
+(define_code_iterator all_minus [minus ss_minus us_minus])
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_code_iterator extend [sign_extend zero_extend])
+
+(define_code_iterator sumax [smax umax])
+
+(define_code_iterator sumin [smin umin])
+
+(define_code_iterator sumin_max [smax umax smin umin])
 
 ;;----------------------------------------------------------------------------
 ;; Code attributes.
 ;;----------------------------------------------------------------------------
 
+;; shifts
+(define_code_attr shift
+  [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
+
+(define_code_attr su
+  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
+
+(define_code_attr zs
+  [(sign_extend "s") (zero_extend "z")])
+
+(define_code_attr uk
+  [(plus "") (ss_plus "k") (us_plus "uk")
+   (minus "") (ss_minus "k") (us_minus "uk")])
+
+(define_code_attr opcode
+  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
+
+(define_code_attr add_rsub
+  [(plus "a") (minus "rs")])
+
+(define_code_attr add_sub
+  [(plus "a") (minus "s")])
 
 ;;----------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/linux.h gcc-4.9.4/gcc/config/nds32/linux.h
--- gcc-4.9.4.orig/gcc/config/nds32/linux.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/linux.h	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,70 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
+    }                                           \
+  while (0)
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+ " %{G*}" \
+ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static:-static}}" \
+  NDS32_RELAX_SPEC \
+  NDS32_IFC_SPEC \
+  NDS32_EX9_SPEC
+
+#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} "
+
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.
+   Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html  */
+#define HAVE_sync_compare_and_swapqi 1
+#define HAVE_sync_compare_and_swaphi 1
+#define HAVE_sync_compare_and_swapsi 1
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32.c gcc-4.9.4/gcc/config/nds32/nds32.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32.c	2014-02-14 07:02:16.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32.c	2016-08-08 20:37:45.586273189 +0200
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -18,13 +18,14 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-
+/* ------------------------------------------------------------------------ */
 
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "stringpool.h"
 #include "stor-layout.h"
 #include "varasm.h"
 #include "calls.h"
@@ -50,34 +51,58 @@
 #include "target-def.h"
 #include "langhooks.h"		/* For add_builtin_function().  */
 #include "ggc.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "cfgloop.h"
+#include "context.h"
+#include "params.h"
+#include "cpplib.h"
+#include "hw-doloop.h"
 
 /* ------------------------------------------------------------------------ */
 
-/* This file is divided into five parts:
+/* This file is divided into six parts:
 
-     PART 1: Auxiliary static variable definitions and
-             target hook static variable definitions.
+     PART 1: Auxiliary external function and variable declarations.
 
-     PART 2: Auxiliary static function definitions.
+     PART 2: Auxiliary static variable definitions and
+	     target hook static variable definitions.
 
-     PART 3: Implement target hook stuff definitions.
+     PART 3: Auxiliary static function definitions.
 
-     PART 4: Implemet extern function definitions,
-             the prototype is in nds32-protos.h.
+     PART 4: Implement target hook stuff definitions.
 
-     PART 5: Initialize target hook structure and definitions.  */
+     PART 5: Implemet extern function definitions,
+	     the prototype is in nds32-protos.h.
+
+     PART 6: Initialize target hook structure and definitions.  */
 
 /* ------------------------------------------------------------------------ */
 
-/* PART 1: Auxiliary static variable definitions and
-           target hook static variable definitions.  */
+/* PART 1: Auxiliary function and variable declarations.  */
+
+namespace nds32 {
+namespace scheduling {
+
+extern unsigned int nds32_print_stalls (void);
+rtl_opt_pass *make_pass_nds32_print_stalls (gcc::context *);
+
+} // namespace scheduling
+} // namespace nds32
+
+rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
+rtl_opt_pass *make_pass_nds32_load_store_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_soft_fp_arith_comm_opt(gcc::context *);
+rtl_opt_pass *make_pass_nds32_regrename_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_gcse_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_hwloop1_opt (gcc::context *);
+rtl_opt_pass *make_pass_nds32_hwloop2_opt (gcc::context *);
+
+/* ------------------------------------------------------------------------ */
 
-/* Refer to nds32.h, there are maximum 73 isr vectors in nds32 architecture.
-   0 for reset handler with __attribute__((reset())),
-   1-8 for exception handler with __attribute__((exception(1,...,8))),
-   and 9-72 for interrupt handler with __attribute__((interrupt(0,...,63))).
-   We use an array to record essential information for each vector.  */
-static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
+/* PART 2: Auxiliary static variable definitions and
+	   target hook static variable definitions.  */
 
 /* Define intrinsic register names.
    Please refer to nds32_intrinsic.h file, the index is corresponding to
@@ -85,14 +110,210 @@
    NOTE that the base value starting from 1024.  */
 static const char * const nds32_intrinsic_register_names[] =
 {
-  "$PSW", "$IPSW", "$ITYPE", "$IPC"
+  "$CPU_VER",
+  "$ICM_CFG",
+  "$DCM_CFG",
+  "$MMU_CFG",
+  "$MSC_CFG",
+  "$MSC_CFG2",
+  "$CORE_ID",
+  "$FUCOP_EXIST",
+
+  "$PSW",
+  "$IPSW",
+  "$P_IPSW",
+  "$IVB",
+  "$EVA",
+  "$P_EVA",
+  "$ITYPE",
+  "$P_ITYPE",
+
+  "$MERR",
+  "$IPC",
+  "$P_IPC",
+  "$OIPC",
+  "$P_P0",
+  "$P_P1",
+
+  "$INT_MASK",
+  "$INT_MASK2",
+  "$INT_PEND",
+  "$INT_PEND2",
+  "$SP_USR",
+  "$SP_PRIV",
+  "$INT_PRI",
+  "$INT_PRI2",
+  "$INT_CTRL",
+  "$INT_TRIGGER",
+  "$INT_GPR_PUSH_DIS",
+
+  "$MMU_CTL",
+  "$L1_PPTB",
+  "$TLB_VPN",
+  "$TLB_DATA",
+  "$TLB_MISC",
+  "$VLPT_IDX",
+  "$ILMB",
+  "$DLMB",
+
+  "$CACHE_CTL",
+  "$HSMP_SADDR",
+  "$HSMP_EADDR",
+  "$SDZ_CTL",
+  "$N12MISC_CTL",
+  "$MISC_CTL",
+  "$ECC_MISC",
+
+  "$BPC0",
+  "$BPC1",
+  "$BPC2",
+  "$BPC3",
+  "$BPC4",
+  "$BPC5",
+  "$BPC6",
+  "$BPC7",
+
+  "$BPA0",
+  "$BPA1",
+  "$BPA2",
+  "$BPA3",
+  "$BPA4",
+  "$BPA5",
+  "$BPA6",
+  "$BPA7",
+
+  "$BPAM0",
+  "$BPAM1",
+  "$BPAM2",
+  "$BPAM3",
+  "$BPAM4",
+  "$BPAM5",
+  "$BPAM6",
+  "$BPAM7",
+
+  "$BPV0",
+  "$BPV1",
+  "$BPV2",
+  "$BPV3",
+  "$BPV4",
+  "$BPV5",
+  "$BPV6",
+  "$BPV7",
+
+  "$BPCID0",
+  "$BPCID1",
+  "$BPCID2",
+  "$BPCID3",
+  "$BPCID4",
+  "$BPCID5",
+  "$BPCID6",
+  "$BPCID7",
+
+  "$EDM_CFG",
+  "$EDMSW",
+  "$EDM_CTL",
+  "$EDM_DTR",
+  "$BPMTC",
+  "$DIMBR",
+
+  "$TECR0",
+  "$TECR1",
+  "$PFMC0",
+  "$PFMC1",
+  "$PFMC2",
+  "$PFM_CTL",
+  "$PFT_CTL",
+  "$HSP_CTL",
+  "$SP_BOUND",
+  "$SP_BOUND_PRIV",
+  "$FUCOP_CTL",
+  "$PRUSR_ACC_CTL",
+
+  "$DMA_CFG",
+  "$DMA_GCSW",
+  "$DMA_CHNSEL",
+  "$DMA_ACT",
+  "$DMA_SETUP",
+  "$DMA_ISADDR",
+  "$DMA_ESADDR",
+  "$DMA_TCNT",
+  "$DMA_STATUS",
+  "$DMA_2DSET",
+  "$DMA_2DSCTL",
+  "$DMA_RCNT",
+  "$DMA_HSTATUS",
+
+  "$PC",
+  "$SP_USR1",
+  "$SP_USR2",
+  "$SP_USR3",
+  "$SP_PRIV1",
+  "$SP_PRIV2",
+  "$SP_PRIV3",
+  "$BG_REGION",
+  "$SFCR",
+  "$SIGN",
+  "$ISIGN",
+  "$P_ISIGN",
+  "$IFC_LP",
+  "$ITB"
+};
+
+/* Define instrinsic cctl names.  */
+static const char * const nds32_cctl_names[] =
+{
+  "L1D_VA_FILLCK",
+  "L1D_VA_ULCK",
+  "L1I_VA_FILLCK",
+  "L1I_VA_ULCK",
+
+  "L1D_IX_WBINVAL",
+  "L1D_IX_INVAL",
+  "L1D_IX_WB",
+  "L1I_IX_INVAL",
+
+  "L1D_VA_INVAL",
+  "L1D_VA_WB",
+  "L1D_VA_WBINVAL",
+  "L1I_VA_INVAL",
+
+  "L1D_IX_RTAG",
+  "L1D_IX_RWD",
+  "L1I_IX_RTAG",
+  "L1I_IX_RWD",
+
+  "L1D_IX_WTAG",
+  "L1D_IX_WWD",
+  "L1I_IX_WTAG",
+  "L1I_IX_WWD"
+};
+
+static const char * const nds32_dpref_names[] =
+{
+  "SRD",
+  "MRD",
+  "SWR",
+  "MWR",
+  "PTE",
+  "CLWR"
+};
+
+/* Defining register allocation order for performance.
+   We want to allocate callee-saved registers after others.
+   It may be used by nds32_adjust_reg_alloc_order().  */
+static const int nds32_reg_alloc_order_for_speed[] =
+{
+   0,   1,   2,   3,   4,   5,  16,  17,
+  18,  19,  20,  21,  22,  23,  24,  25,
+  26,  27,   6,   7,   8,   9,  10,  11,
+  12,  13,  14,  15
 };
 
 /* Defining target-specific uses of __attribute__.  */
 static const struct attribute_spec nds32_attribute_table[] =
 {
   /* Syntax: { name, min_len, max_len, decl_required, type_required,
-               function_type_required, handler, affects_type_identity } */
+	       function_type_required, handler, affects_type_identity } */
 
   /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72).  */
   { "interrupt",    1, 64, false, false, false, NULL, false },
@@ -105,6 +326,7 @@
   { "nested",       0,  0, false, false, false, NULL, false },
   { "not_nested",   0,  0, false, false, false, NULL, false },
   { "nested_ready", 0,  0, false, false, false, NULL, false },
+  { "critical",     0,  0, false, false, false, NULL, false },
 
   /* The attributes describing isr register save scheme.  */
   { "save_all",     0,  0, false, false, false, NULL, false },
@@ -117,14 +339,26 @@
   /* The attribute telling no prologue/epilogue.  */
   { "naked",        0,  0, false, false, false, NULL, false },
 
+  /* The attribute is used to set signature.  */
+  { "signature",    0,  0, false, false, false, NULL, false },
+
+  /* The attribute is used to tell this function to be ROM patch.  */
+  { "indirect_call",0,  0, false, false, false, NULL, false },
+
+  /* FOR BACKWARD COMPATIBILITY,
+     this attribute also tells no prologue/epilogue.  */
+  { "no_prologue",  0,  0, false, false, false, NULL, false },
+
+  /* The attribute turn off hwloop optimization.  */
+  { "no_ext_zol",    0,  0, false,  false, false, NULL, false},
+
   /* The last attribute spec is set to be NULL.  */
   { NULL,           0,  0, false, false, false, NULL, false }
 };
 
-
 /* ------------------------------------------------------------------------ */
 
-/* PART 2: Auxiliary static function definitions.  */
+/* PART 3: Auxiliary static function definitions.  */
 
 /* Function to save and restore machine-specific function data.  */
 static struct machine_function *
@@ -133,12 +367,22 @@
   struct machine_function *machine;
   machine = ggc_alloc_cleared_machine_function ();
 
+  /* Initially assume this function does not use __builtin_eh_return.  */
+  machine->use_eh_return_p = 0;
+
   /* Initially assume this function needs prologue/epilogue.  */
   machine->naked_p = 0;
 
   /* Initially assume this function does NOT use fp_as_gp optimization.  */
   machine->fp_as_gp_p = 0;
 
+  /* Initially this function is not under strictly aligned situation.  */
+  machine->strict_aligned_p = 0;
+
+  /* Initially this function has no naked and no_prologue attributes.  */
+  machine->attr_naked_p = 0;
+  machine->attr_no_prologue_p = 0;
+
   return machine;
 }
 
@@ -149,23 +393,77 @@
 {
   int r;
   int block_size;
+  bool v3pushpop_p;
 
   /* Because nds32_compute_stack_frame() will be called from different place,
      everytime we enter this function, we have to assume this function
      needs prologue/epilogue.  */
   cfun->machine->naked_p = 0;
 
+  /* We need to mark whether this function has naked and no_prologue
+     attribute so that we can distinguish the difference if users applies
+     -mret-in-naked-func option.  */
+  cfun->machine->attr_naked_p
+    = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
+  cfun->machine->attr_no_prologue_p
+    = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
+
+  /* If __builtin_eh_return is used, we better have frame pointer needed
+     so that we can easily locate the stack slot of return address.  */
+  if (crtl->calls_eh_return)
+    {
+      frame_pointer_needed = 1;
+
+      /* We need to mark eh data registers that need to be saved
+	 in the stack.  */
+      cfun->machine->eh_return_data_first_regno = EH_RETURN_DATA_REGNO (0);
+      for (r = 0; EH_RETURN_DATA_REGNO (r) != INVALID_REGNUM; r++)
+	cfun->machine->eh_return_data_last_regno = r;
+
+      cfun->machine->eh_return_data_regs_size
+	= 4 * (cfun->machine->eh_return_data_last_regno
+	       - cfun->machine->eh_return_data_first_regno
+	       + 1);
+      cfun->machine->use_eh_return_p = 1;
+    }
+  else
+    {
+      /* Assigning SP_REGNUM to eh_first_regno and eh_last_regno means we
+	 do not need to handle __builtin_eh_return case in this function.  */
+      cfun->machine->eh_return_data_first_regno = SP_REGNUM;
+      cfun->machine->eh_return_data_last_regno  = SP_REGNUM;
+
+      cfun->machine->eh_return_data_regs_size = 0;
+      cfun->machine->use_eh_return_p = 0;
+    }
+
   /* Get variadic arguments size to prepare pretend arguments and
-     push them into stack at prologue.
-     Currently, we do not push variadic arguments by ourself.
-     We have GCC handle all the works.
-     The caller will push all corresponding nameless arguments into stack,
-     and the callee is able to retrieve them without problems.
-     These variables are still preserved in case one day
-     we would like caller passing arguments with registers.  */
-  cfun->machine->va_args_size = 0;
-  cfun->machine->va_args_first_regno = SP_REGNUM;
-  cfun->machine->va_args_last_regno  = SP_REGNUM;
+     we will push them into stack at prologue by ourself.  */
+  cfun->machine->va_args_size = crtl->args.pretend_args_size;
+  if (cfun->machine->va_args_size != 0)
+    {
+      cfun->machine->va_args_first_regno
+	= NDS32_GPR_ARG_FIRST_REGNUM
+	  + NDS32_MAX_GPR_REGS_FOR_ARGS
+	  - (crtl->args.pretend_args_size / UNITS_PER_WORD);
+      cfun->machine->va_args_last_regno
+	= NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1;
+    }
+  else
+    {
+      cfun->machine->va_args_first_regno = SP_REGNUM;
+      cfun->machine->va_args_last_regno  = SP_REGNUM;
+    }
+
+  /* Important: We need to make sure that varargs area is 8-byte alignment.  */
+  block_size = cfun->machine->va_args_size;
+  if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
+    {
+      cfun->machine->va_args_area_padding_bytes
+	= NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size;
+    }
 
   /* Get local variables, incoming variables, and temporary variables size.
      Note that we need to make sure it is 8-byte alignment because
@@ -181,19 +479,25 @@
 
   /* If $gp value is required to be saved on stack, it needs 4 bytes space.
      Check whether we are using PIC code genration.  */
-  cfun->machine->gp_size = (flag_pic) ? 4 : 0;
+  cfun->machine->gp_size =
+    (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0;
 
   /* If $lp value is required to be saved on stack, it needs 4 bytes space.
      Check whether $lp is ever live.  */
-  cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
+  cfun->machine->lp_size
+    = (flag_always_save_lp || df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0;
 
   /* Initially there is no padding bytes.  */
-  cfun->machine->callee_saved_area_padding_bytes = 0;
+  cfun->machine->callee_saved_area_gpr_padding_bytes = 0;
 
   /* Calculate the bytes of saving callee-saved registers on stack.  */
-  cfun->machine->callee_saved_regs_size = 0;
-  cfun->machine->callee_saved_regs_first_regno = SP_REGNUM;
-  cfun->machine->callee_saved_regs_last_regno  = SP_REGNUM;
+  cfun->machine->callee_saved_gpr_regs_size = 0;
+  cfun->machine->callee_saved_first_gpr_regno = SP_REGNUM;
+  cfun->machine->callee_saved_last_gpr_regno  = SP_REGNUM;
+  cfun->machine->callee_saved_fpr_regs_size = 0;
+  cfun->machine->callee_saved_first_fpr_regno = SP_REGNUM;
+  cfun->machine->callee_saved_last_fpr_regno  = SP_REGNUM;
+
   /* Currently, there is no need to check $r28~$r31
      because we will save them in another way.  */
   for (r = 0; r < 28; r++)
@@ -204,46 +508,83 @@
 	     (only need to set it once).
 	     If first regno == SP_REGNUM, we can tell that
 	     it is the first time to be here.  */
-	  if (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM)
-	    cfun->machine->callee_saved_regs_first_regno = r;
+	  if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM)
+	    cfun->machine->callee_saved_first_gpr_regno = r;
 	  /* Mark the last required callee-saved register.  */
-	  cfun->machine->callee_saved_regs_last_regno = r;
+	  cfun->machine->callee_saved_last_gpr_regno = r;
+	}
+    }
+
+  /* Recording fpu callee-saved register.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      for (r = NDS32_FIRST_FPR_REGNUM; r < NDS32_LAST_FPR_REGNUM; r++)
+	{
+	  if (NDS32_REQUIRED_CALLEE_SAVED_P (r))
+	    {
+	      /* Mark the first required callee-saved register.  */
+	      if (cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM)
+		{
+		  /* Make first callee-saved number is even,
+		     bacause we use doubleword access, and this way
+		     promise 8-byte alignemt.  */
+		  if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (r))
+		    cfun->machine->callee_saved_first_fpr_regno = r - 1;
+		  else
+		    cfun->machine->callee_saved_first_fpr_regno = r;
+		}
+	      cfun->machine->callee_saved_last_fpr_regno = r;
+	    }
 	}
+
+      /* Make last callee-saved register number is odd,
+	 we hope callee-saved register is even.  */
+      int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
+      if (NDS32_FPR_REGNO_OK_FOR_DOUBLE (last_fpr))
+	cfun->machine->callee_saved_last_fpr_regno++;
     }
 
   /* Check if this function can omit prologue/epilogue code fragment.
-     If there is 'naked' attribute in this function,
+     If there is 'no_prologue'/'naked' attribute in this function,
      we can set 'naked_p' flag to indicate that
      we do not have to generate prologue/epilogue.
      Or, if all the following conditions succeed,
      we can set this function 'naked_p' as well:
        condition 1: first_regno == last_regno == SP_REGNUM,
-                    which means we do not have to save
-                    any callee-saved registers.
+		    which means we do not have to save
+		    any callee-saved registers.
        condition 2: Both $lp and $fp are NOT live in this function,
-                    which means we do not need to save them.
+		    which means we do not need to save them and there
+		    is no outgoing size.
        condition 3: There is no local_size, which means
-                    we do not need to adjust $sp.  */
-  if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
-      || (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM
-	  && cfun->machine->callee_saved_regs_last_regno == SP_REGNUM
+		    we do not need to adjust $sp.  */
+  if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM
+	  && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM
 	  && !df_regs_ever_live_p (FP_REGNUM)
 	  && !df_regs_ever_live_p (LP_REGNUM)
-	  && cfun->machine->local_size == 0))
+	  && cfun->machine->local_size == 0
+	  && !flag_pic))
     {
-      /* Set this function 'naked_p' and
-         other functions can check this flag.  */
+      /* Set this function 'naked_p' and other functions can check this flag.
+	 Note that in nds32 port, the 'naked_p = 1' JUST means there is no
+	 callee-saved, local size, and outgoing size.
+	 The varargs space and ret instruction may still present in
+	 the prologue/epilogue expanding.  */
       cfun->machine->naked_p = 1;
 
       /* No need to save $fp, $gp, and $lp.
-         We should set these value to be zero
-         so that nds32_initial_elimination_offset() can work properly.  */
+	 We should set these value to be zero
+	 so that nds32_initial_elimination_offset() can work properly.  */
       cfun->machine->fp_size = 0;
       cfun->machine->gp_size = 0;
       cfun->machine->lp_size = 0;
 
       /* If stack usage computation is required,
-         we need to provide the static stack size.  */
+	 we need to provide the static stack size.  */
       if (flag_stack_usage_info)
 	current_function_static_stack_size = 0;
 
@@ -251,20 +592,23 @@
       return;
     }
 
+  v3pushpop_p = NDS32_V3PUSH_AVAILABLE_P;
+
   /* Adjustment for v3push instructions:
      If we are using v3push (push25/pop25) instructions,
      we need to make sure Rb is $r6 and Re is
      located on $r6, $r8, $r10, or $r14.
      Some results above will be discarded and recomputed.
-     Note that it is only available under V3/V3M ISA.  */
-  if (TARGET_V3PUSH)
+     Note that it is only available under V3/V3M ISA and we
+     DO NOT setup following stuff for isr or variadic function.  */
+  if (v3pushpop_p)
     {
       /* Recompute:
-           cfun->machine->fp_size
-           cfun->machine->gp_size
-           cfun->machine->lp_size
-           cfun->machine->callee_saved_regs_first_regno
-           cfun->machine->callee_saved_regs_last_regno */
+	   cfun->machine->fp_size
+	   cfun->machine->gp_size
+	   cfun->machine->lp_size
+	   cfun->machine->callee_saved_first_gpr_regno
+	   cfun->machine->callee_saved_last_gpr_regno */
 
       /* For v3push instructions, $fp, $gp, and $lp are always saved.  */
       cfun->machine->fp_size = 4;
@@ -272,33 +616,33 @@
       cfun->machine->lp_size = 4;
 
       /* Remember to set Rb = $r6.  */
-      cfun->machine->callee_saved_regs_first_regno = 6;
+      cfun->machine->callee_saved_first_gpr_regno = 6;
 
-      if (cfun->machine->callee_saved_regs_last_regno <= 6)
+      if (cfun->machine->callee_saved_last_gpr_regno <= 6)
 	{
 	  /* Re = $r6 */
-	  cfun->machine->callee_saved_regs_last_regno = 6;
+	  cfun->machine->callee_saved_last_gpr_regno = 6;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 8)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 8)
 	{
 	  /* Re = $r8 */
-	  cfun->machine->callee_saved_regs_last_regno = 8;
+	  cfun->machine->callee_saved_last_gpr_regno = 8;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 10)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 10)
 	{
 	  /* Re = $r10 */
-	  cfun->machine->callee_saved_regs_last_regno = 10;
+	  cfun->machine->callee_saved_last_gpr_regno = 10;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno <= 14)
+      else if (cfun->machine->callee_saved_last_gpr_regno <= 14)
 	{
 	  /* Re = $r14 */
-	  cfun->machine->callee_saved_regs_last_regno = 14;
+	  cfun->machine->callee_saved_last_gpr_regno = 14;
 	}
-      else if (cfun->machine->callee_saved_regs_last_regno == SP_REGNUM)
+      else if (cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM)
 	{
 	  /* If last_regno is SP_REGNUM, which means
 	     it is never changed, so set it to Re = $r6.  */
-	  cfun->machine->callee_saved_regs_last_regno = 6;
+	  cfun->machine->callee_saved_last_gpr_regno = 6;
 	}
       else
 	{
@@ -307,33 +651,78 @@
 	}
     }
 
-  /* We have correctly set callee_saved_regs_first_regno
-     and callee_saved_regs_last_regno.
-     Initially, the callee_saved_regs_size is supposed to be 0.
-     As long as callee_saved_regs_last_regno is not SP_REGNUM,
-     we can update callee_saved_regs_size with new size.  */
-  if (cfun->machine->callee_saved_regs_last_regno != SP_REGNUM)
+  int sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_gpr_padding_bytes
+		  + cfun->machine->callee_saved_fpr_regs_size;
+
+  if (!v3pushpop_p
+      && nds32_memory_model_option == MEMORY_MODEL_FAST
+      && sp_adjust == 0
+      && !frame_pointer_needed)
+    {
+      block_size = cfun->machine->fp_size
+		   + cfun->machine->gp_size
+		   + cfun->machine->lp_size
+		   + (4 * (cfun->machine->callee_saved_last_gpr_regno
+			   - cfun->machine->callee_saved_first_gpr_regno
+			   + 1));
+
+      if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
+	{
+	  /* $r14 is last callee save register.  */
+	  if (cfun->machine->callee_saved_last_gpr_regno
+	      < NDS32_LAST_CALLEE_SAVE_GPR_REGNUM)
+	    {
+	      cfun->machine->callee_saved_last_gpr_regno++;
+	    }
+	  else if (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM)
+	    {
+	      cfun->machine->callee_saved_first_gpr_regno
+		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
+	      cfun->machine->callee_saved_last_gpr_regno
+		= NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM;
+	    }
+	}
+    }
+
+  /* We have correctly set callee_saved_first_gpr_regno
+     and callee_saved_last_gpr_regno.
+     Initially, the callee_saved_gpr_regs_size is supposed to be 0.
+     As long as callee_saved_last_gpr_regno is not SP_REGNUM,
+     we can update callee_saved_gpr_regs_size with new size.  */
+  if (cfun->machine->callee_saved_last_gpr_regno != SP_REGNUM)
     {
       /* Compute pushed size of callee-saved registers.  */
-      cfun->machine->callee_saved_regs_size
-	= 4 * (cfun->machine->callee_saved_regs_last_regno
-	       - cfun->machine->callee_saved_regs_first_regno
+      cfun->machine->callee_saved_gpr_regs_size
+	= 4 * (cfun->machine->callee_saved_last_gpr_regno
+	       - cfun->machine->callee_saved_first_gpr_regno
 	       + 1);
     }
 
+  if (TARGET_HARD_FLOAT)
+    {
+      /* Compute size of callee svaed floating-point registers.  */
+      if (cfun->machine->callee_saved_last_fpr_regno != SP_REGNUM)
+	{
+	  cfun->machine->callee_saved_fpr_regs_size
+	   = 4 * (cfun->machine->callee_saved_last_fpr_regno
+		  - cfun->machine->callee_saved_first_fpr_regno
+		  + 1);
+	}
+    }
+
   /* Important: We need to make sure that
-                (va_args_size + fp_size + gp_size
-                 + lp_size + callee_saved_regs_size)
-                is 8-byte alignment.
-                If it is not, calculate the padding bytes.  */
-  block_size = cfun->machine->va_args_size
-	       + cfun->machine->fp_size
+		(fp_size + gp_size + lp_size + callee_saved_gpr_regs_size)
+		is 8-byte alignment.
+		If it is not, calculate the padding bytes.  */
+  block_size = cfun->machine->fp_size
 	       + cfun->machine->gp_size
 	       + cfun->machine->lp_size
-	       + cfun->machine->callee_saved_regs_size;
+	       + cfun->machine->callee_saved_gpr_regs_size;
   if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size))
     {
-      cfun->machine->callee_saved_area_padding_bytes
+      cfun->machine->callee_saved_area_gpr_padding_bytes
 	= NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size;
     }
 
@@ -353,11 +742,12 @@
    The overall concept are:
      "push registers to memory",
      "adjust stack pointer".  */
-static rtx
-nds32_gen_stack_push_multiple (rtx Rb, rtx Re,
-			       rtx En4 ATTRIBUTE_UNUSED)
+static void
+nds32_emit_stack_push_multiple (unsigned Rb, unsigned Re,
+				bool save_fp_p, bool save_gp_p, bool save_lp_p,
+				bool vaarg_p)
 {
-  int regno;
+  unsigned regno;
   int extra_count;
   int num_use_regs;
   int par_index;
@@ -368,39 +758,40 @@
   rtx push_rtx;
   rtx adjust_sp_rtx;
   rtx parallel_insn;
+  rtx dwarf;
 
   /* We need to provide a customized rtx which contains
      necessary information for data analysis,
      so we create a parallel rtx like this:
      (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
-                     (reg:SI Rb))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
-                     (reg:SI Rb+1))
-                ...
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
-                     (reg:SI Re))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
-                     (reg:SI FP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
-                     (reg:SI GP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
-                     (reg:SI LP_REGNUM))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int -32)))]) */
+		     (reg:SI Rb))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+		     (reg:SI Rb+1))
+		...
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+		     (reg:SI Re))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+		     (reg:SI FP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+		     (reg:SI GP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+		     (reg:SI LP_REGNUM))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int -32)))]) */
 
   /* Calculate the number of registers that will be pushed.  */
   extra_count = 0;
-  if (cfun->machine->fp_size)
+  if (save_fp_p)
     extra_count++;
-  if (cfun->machine->gp_size)
+  if (save_gp_p)
     extra_count++;
-  if (cfun->machine->lp_size)
+  if (save_lp_p)
     extra_count++;
   /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
-  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
+  if (Rb == SP_REGNUM && Re == SP_REGNUM)
     num_use_regs = extra_count;
   else
-    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
+    num_use_regs = Re - Rb + 1 + extra_count;
 
   /* In addition to used registers,
      we need one more space for (set sp sp-x) rtx.  */
@@ -412,12 +803,12 @@
   offset = -(num_use_regs * 4);
 
   /* Create (set mem regX) from Rb, Rb+1 up to Re.  */
-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+  for (regno = Rb; regno <= Re; regno++)
     {
       /* Rb and Re may be SP_REGNUM.
-         We need to break this loop immediately.  */
+	 We need to break this loop immediately.  */
       if (regno == SP_REGNUM)
-        break;
+	break;
 
       reg = gen_rtx_REG (SImode, regno);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -431,7 +822,7 @@
     }
 
   /* Create (set mem fp), (set mem gp), and (set mem lp) if necessary.  */
-  if (cfun->machine->fp_size)
+  if (save_fp_p)
     {
       reg = gen_rtx_REG (SImode, FP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -443,7 +834,7 @@
       offset = offset + 4;
       par_index++;
     }
-  if (cfun->machine->gp_size)
+  if (save_gp_p)
     {
       reg = gen_rtx_REG (SImode, GP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -455,7 +846,7 @@
       offset = offset + 4;
       par_index++;
     }
-  if (cfun->machine->lp_size)
+  if (save_lp_p)
     {
       reg = gen_rtx_REG (SImode, LP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -479,7 +870,21 @@
   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
   RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
 
-  return parallel_insn;
+  parallel_insn = emit_insn (parallel_insn);
+
+  /* The insn rtx 'parallel_insn' will change frame layout.
+     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+     generate CFI (Call Frame Information) stuff.  */
+  RTX_FRAME_RELATED_P (parallel_insn) = 1;
+
+  /* Don't use GCC's logic for CFI info if we are generate a push for VAARG
+     since we will not restore those register at epilogue.  */
+  if (vaarg_p)
+    {
+      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA,
+			      copy_rtx (adjust_sp_rtx), NULL_RTX);
+      REG_NOTES (parallel_insn) = dwarf;
+    }
 }
 
 /* Function to create a parallel rtx pattern
@@ -487,11 +892,11 @@
    The overall concept are:
      "pop registers from memory",
      "adjust stack pointer".  */
-static rtx
-nds32_gen_stack_pop_multiple (rtx Rb, rtx Re,
-			      rtx En4 ATTRIBUTE_UNUSED)
+static void
+nds32_emit_stack_pop_multiple (unsigned Rb, unsigned Re,
+			       bool save_fp_p, bool save_gp_p, bool save_lp_p)
 {
-  int regno;
+  unsigned regno;
   int extra_count;
   int num_use_regs;
   int par_index;
@@ -502,39 +907,40 @@
   rtx pop_rtx;
   rtx adjust_sp_rtx;
   rtx parallel_insn;
+  rtx dwarf = NULL_RTX;
 
   /* We need to provide a customized rtx which contains
      necessary information for data analysis,
      so we create a parallel rtx like this:
      (parallel [(set (reg:SI Rb)
-                     (mem (reg:SI SP_REGNUM)))
-                (set (reg:SI Rb+1)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
-                ...
-                (set (reg:SI Re)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
-                (set (reg:SI FP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
-                (set (reg:SI GP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
-                (set (reg:SI LP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+		     (mem (reg:SI SP_REGNUM)))
+		(set (reg:SI Rb+1)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+		...
+		(set (reg:SI Re)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+		(set (reg:SI FP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+		(set (reg:SI GP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+		(set (reg:SI LP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
 
   /* Calculate the number of registers that will be poped.  */
   extra_count = 0;
-  if (cfun->machine->fp_size)
+  if (save_fp_p)
     extra_count++;
-  if (cfun->machine->gp_size)
+  if (save_gp_p)
     extra_count++;
-  if (cfun->machine->lp_size)
+  if (save_lp_p)
     extra_count++;
   /* Note that Rb and Re may be SP_REGNUM.  DO NOT count it in.  */
-  if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM)
+  if (Rb == SP_REGNUM && Re == SP_REGNUM)
     num_use_regs = extra_count;
   else
-    num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count;
+    num_use_regs = Re - Rb + 1 + extra_count;
 
   /* In addition to used registers,
      we need one more space for (set sp sp+x) rtx.  */
@@ -546,12 +952,12 @@
   offset = 0;
 
   /* Create (set regX mem) from Rb, Rb+1 up to Re.  */
-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+  for (regno = Rb; regno <= Re; regno++)
     {
       /* Rb and Re may be SP_REGNUM.
-         We need to break this loop immediately.  */
+	 We need to break this loop immediately.  */
       if (regno == SP_REGNUM)
-        break;
+	break;
 
       reg = gen_rtx_REG (SImode, regno);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -562,10 +968,12 @@
       RTX_FRAME_RELATED_P (pop_rtx) = 1;
       offset = offset + 4;
       par_index++;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
     }
 
   /* Create (set fp mem), (set gp mem), and (set lp mem) if necessary.  */
-  if (cfun->machine->fp_size)
+  if (save_fp_p)
     {
       reg = gen_rtx_REG (SImode, FP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -576,8 +984,10 @@
       RTX_FRAME_RELATED_P (pop_rtx) = 1;
       offset = offset + 4;
       par_index++;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
     }
-  if (cfun->machine->gp_size)
+  if (save_gp_p)
     {
       reg = gen_rtx_REG (SImode, GP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -588,8 +998,10 @@
       RTX_FRAME_RELATED_P (pop_rtx) = 1;
       offset = offset + 4;
       par_index++;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
     }
-  if (cfun->machine->lp_size)
+  if (save_lp_p)
     {
       reg = gen_rtx_REG (SImode, LP_REGNUM);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -600,6 +1012,8 @@
       RTX_FRAME_RELATED_P (pop_rtx) = 1;
       offset = offset + 4;
       par_index++;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
     }
 
   /* Create (set sp sp+x).  */
@@ -610,9 +1024,19 @@
 		   stack_pointer_rtx,
 		   plus_constant (Pmode, stack_pointer_rtx, offset));
   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
-  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
 
-  return parallel_insn;
+  /* Tell gcc we adjust SP in this insn.  */
+  dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), dwarf);
+
+  parallel_insn = emit_insn (parallel_insn);
+
+  /* The insn rtx 'parallel_insn' will change frame layout.
+     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+     generate CFI (Call Frame Information) stuff.  */
+  RTX_FRAME_RELATED_P (parallel_insn) = 1;
+
+  /* Add CFI info by manual.  */
+  REG_NOTES (parallel_insn) = dwarf;
 }
 
 /* Function to create a parallel rtx pattern
@@ -620,13 +1044,12 @@
    The overall concept are:
      "push registers to memory",
      "adjust stack pointer".  */
-static rtx
-nds32_gen_stack_v3push (rtx Rb,
-			rtx Re,
-			rtx En4 ATTRIBUTE_UNUSED,
-			rtx imm8u)
+static void
+nds32_emit_stack_v3push (unsigned Rb,
+			 unsigned Re,
+			 unsigned imm8u)
 {
-  int regno;
+  unsigned regno;
   int num_use_regs;
   int par_index;
   int offset;
@@ -640,29 +1063,28 @@
   /* We need to provide a customized rtx which contains
      necessary information for data analysis,
      so we create a parallel rtx like this:
-     (parallel [
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
-                     (reg:SI Rb))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
-                     (reg:SI Rb+1))
-                ...
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
-                     (reg:SI Re))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
-                     (reg:SI FP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
-                     (reg:SI GP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
-                     (reg:SI LP_REGNUM))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */
+     (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+		     (reg:SI Rb))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+		     (reg:SI Rb+1))
+		...
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+		     (reg:SI Re))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+		     (reg:SI FP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+		     (reg:SI GP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+		     (reg:SI LP_REGNUM))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */
 
   /* Calculate the number of registers that will be pushed.
      Since $fp, $gp, and $lp is always pushed with v3push instruction,
      we need to count these three registers.
      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
-  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
+  num_use_regs = Re - Rb + 1 + 3;
 
   /* In addition to used registers,
      we need one more space for (set sp sp-x-imm8u) rtx.  */
@@ -676,7 +1098,7 @@
   /* Create (set mem regX) from Rb, Rb+1 up to Re.
      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+  for (regno = Rb; regno <= Re; regno++)
     {
       reg = gen_rtx_REG (SImode, regno);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -729,11 +1151,16 @@
 		   stack_pointer_rtx,
 		   plus_constant (Pmode,
 				  stack_pointer_rtx,
-				  offset - INTVAL (imm8u)));
+				  offset - imm8u));
   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
   RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
 
-  return parallel_insn;
+  parallel_insn = emit_insn (parallel_insn);
+
+  /* The insn rtx 'parallel_insn' will change frame layout.
+     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+     generate CFI (Call Frame Information) stuff.  */
+  RTX_FRAME_RELATED_P (parallel_insn) = 1;
 }
 
 /* Function to create a parallel rtx pattern
@@ -741,13 +1168,12 @@
    The overall concept are:
      "pop registers from memory",
      "adjust stack pointer".  */
-static rtx
-nds32_gen_stack_v3pop (rtx Rb,
-		       rtx Re,
-		       rtx En4 ATTRIBUTE_UNUSED,
-		       rtx imm8u)
+static void
+nds32_emit_stack_v3pop (unsigned Rb,
+			unsigned Re,
+			unsigned imm8u)
 {
-  int regno;
+  unsigned regno;
   int num_use_regs;
   int par_index;
   int offset;
@@ -757,32 +1183,33 @@
   rtx pop_rtx;
   rtx adjust_sp_rtx;
   rtx parallel_insn;
+  rtx dwarf = NULL_RTX;
 
   /* We need to provide a customized rtx which contains
      necessary information for data analysis,
      so we create a parallel rtx like this:
      (parallel [(set (reg:SI Rb)
-                     (mem (reg:SI SP_REGNUM)))
-                (set (reg:SI Rb+1)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
-                ...
-                (set (reg:SI Re)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
-                (set (reg:SI FP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
-                (set (reg:SI GP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
-                (set (reg:SI LP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */
+		     (mem (reg:SI SP_REGNUM)))
+		(set (reg:SI Rb+1)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+		...
+		(set (reg:SI Re)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+		(set (reg:SI FP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+		(set (reg:SI GP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+		(set (reg:SI LP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */
 
   /* Calculate the number of registers that will be poped.
      Since $fp, $gp, and $lp is always poped with v3pop instruction,
      we need to count these three registers.
      Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
-  num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3;
+  num_use_regs = Re - Rb + 1 + 3;
 
   /* In addition to used registers,
      we need one more space for (set sp sp+x+imm8u) rtx.  */
@@ -796,7 +1223,7 @@
   /* Create (set regX mem) from Rb, Rb+1 up to Re.
      Under v3pop, Rb is $r6, while Re is $r6, $r8, $r10, or $r14.
      So there is no need to worry about Rb=Re=SP_REGNUM case.  */
-  for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++)
+  for (regno = Rb; regno <= Re; regno++)
     {
       reg = gen_rtx_REG (SImode, regno);
       mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -807,6 +1234,8 @@
       RTX_FRAME_RELATED_P (pop_rtx) = 1;
       offset = offset + 4;
       par_index++;
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
     }
 
   /* Create (set fp mem).  */
@@ -819,6 +1248,8 @@
   RTX_FRAME_RELATED_P (pop_rtx) = 1;
   offset = offset + 4;
   par_index++;
+  dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
   /* Create (set gp mem).  */
   reg = gen_rtx_REG (SImode, GP_REGNUM);
   mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -829,6 +1260,8 @@
   RTX_FRAME_RELATED_P (pop_rtx) = 1;
   offset = offset + 4;
   par_index++;
+  dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
   /* Create (set lp mem ).  */
   reg = gen_rtx_REG (SImode, LP_REGNUM);
   mem = gen_frame_mem (SImode, plus_constant (Pmode,
@@ -839,6 +1272,7 @@
   RTX_FRAME_RELATED_P (pop_rtx) = 1;
   offset = offset + 4;
   par_index++;
+  dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
 
   /* Create (set sp sp+x+imm8u).  */
 
@@ -848,509 +1282,60 @@
 		   stack_pointer_rtx,
 		   plus_constant (Pmode,
 				  stack_pointer_rtx,
-				  offset + INTVAL (imm8u)));
+				  offset + imm8u));
   XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx;
-  RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1;
-
-  return parallel_insn;
-}
-
-/* A subroutine that checks multiple load and store
-   using consecutive registers.
-     OP is a parallel rtx we would like to check.
-     LOAD_P indicates whether we are checking load operation.
-     PAR_INDEX is starting element of parallel rtx.
-     FIRST_ELT_REGNO is used to tell starting register number.
-     COUNT helps us to check consecutive register numbers.  */
-static bool
-nds32_consecutive_registers_load_store_p (rtx op,
-					  bool load_p,
-					  int par_index,
-					  int first_elt_regno,
-					  int count)
-{
-  int i;
-  int check_regno;
-  rtx elt;
-  rtx elt_reg;
-  rtx elt_mem;
-
-  for (i = 0; i < count; i++)
-    {
-      /* Pick up each element from parallel rtx.  */
-      elt = XVECEXP (op, 0, i + par_index);
-
-      /* If this element is not a 'set' rtx, return false immediately.  */
-      if (GET_CODE (elt) != SET)
-	return false;
-
-      /* Pick up reg and mem of this element.  */
-      elt_reg = load_p ? SET_DEST (elt) : SET_SRC (elt);
-      elt_mem = load_p ? SET_SRC (elt) : SET_DEST (elt);
-
-      /* If elt_reg is not a expected reg rtx, return false.  */
-      if (GET_CODE (elt_reg) != REG || GET_MODE (elt_reg) != SImode)
-	return false;
-      /* If elt_mem is not a expected mem rtx, return false.  */
-      if (GET_CODE (elt_mem) != MEM || GET_MODE (elt_mem) != SImode)
-	return false;
-
-      /* The consecutive registers should be in (Rb,Rb+1...Re) order.  */
-      check_regno = first_elt_regno + i;
-
-      /* If the register number is not continuous, return false.  */
-      if (REGNO (elt_reg) != (unsigned int) check_regno)
-	return false;
-    }
-
-  return true;
-}
-
-/* A helper function to emit section head template.  */
-static void
-nds32_emit_section_head_template (char section_name[],
-				  char symbol_name[],
-				  int align_value,
-				  bool object_p)
-{
-  const char *flags_str;
-  const char *type_str;
-
-  flags_str = (object_p) ? "\"a\"" : "\"ax\"";
-  type_str = (object_p) ? "@object" : "@function";
-
-  fprintf (asm_out_file, "\t.section\t%s, %s\n", section_name, flags_str);
-  fprintf (asm_out_file, "\t.align\t%d\n", align_value);
-  fprintf (asm_out_file, "\t.global\t%s\n", symbol_name);
-  fprintf (asm_out_file, "\t.type\t%s, %s\n", symbol_name, type_str);
-  fprintf (asm_out_file, "%s:\n", symbol_name);
-}
-
-/* A helper function to emit section tail template.  */
-static void
-nds32_emit_section_tail_template (char symbol_name[])
-{
-  fprintf (asm_out_file, "\t.size\t%s, .-%s\n", symbol_name, symbol_name);
-}
-
-/* Function to emit isr jump table section.  */
-static void
-nds32_emit_isr_jmptbl_section (int vector_id)
-{
-  char section_name[100];
-  char symbol_name[100];
 
-  /* Prepare jmptbl section and symbol name.  */
-  snprintf (section_name, sizeof (section_name),
-	    ".nds32_jmptbl.%02d", vector_id);
-  snprintf (symbol_name, sizeof (symbol_name),
-	    "_nds32_jmptbl_%02d", vector_id);
-
-  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
-  fprintf (asm_out_file, "\t.word\t%s\n",
-			 nds32_isr_vectors[vector_id].func_name);
-  nds32_emit_section_tail_template (symbol_name);
-}
-
-/* Function to emit isr vector section.  */
-static void
-nds32_emit_isr_vector_section (int vector_id)
-{
-  unsigned int vector_number_offset = 0;
-  const char *c_str = "CATEGORY";
-  const char *sr_str = "SR";
-  const char *nt_str = "NT";
-  const char *vs_str = "VS";
-  char first_level_handler_name[100];
-  char section_name[100];
-  char symbol_name[100];
-
-  /* Set the vector number offset so that we can calculate
-     the value that user specifies in the attribute.
-     We also prepare the category string for first level handler name.  */
-  switch (nds32_isr_vectors[vector_id].category)
-    {
-    case NDS32_ISR_INTERRUPT:
-      vector_number_offset = 9;
-      c_str = "i";
-      break;
-    case NDS32_ISR_EXCEPTION:
-      vector_number_offset = 0;
-      c_str = "e";
-      break;
-    case NDS32_ISR_NONE:
-    case NDS32_ISR_RESET:
-      /* Normally it should not be here.  */
-      gcc_unreachable ();
-      break;
-    }
-
-  /* Prepare save reg string for first level handler name.  */
-  switch (nds32_isr_vectors[vector_id].save_reg)
-    {
-    case NDS32_SAVE_ALL:
-      sr_str = "sa";
-      break;
-    case NDS32_PARTIAL_SAVE:
-      sr_str = "ps";
-      break;
-    }
-
-  /* Prepare nested type string for first level handler name.  */
-  switch (nds32_isr_vectors[vector_id].nested_type)
+  if (frame_pointer_needed)
     {
-    case NDS32_NESTED:
-      nt_str = "ns";
-      break;
-    case NDS32_NOT_NESTED:
-      nt_str = "nn";
-      break;
-    case NDS32_NESTED_READY:
-      nt_str = "nr";
-      break;
-    }
-
-  /* Currently we have 4-byte or 16-byte size for each vector.
-     If it is 4-byte, the first level handler name has suffix string "_4b".  */
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
-
-  /* Now we can create first level handler name.  */
-  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
-	    "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str);
-
-  /* Prepare vector section and symbol name.  */
-  snprintf (section_name, sizeof (section_name),
-	    ".nds32_vector.%02d", vector_id);
-  snprintf (symbol_name, sizeof (symbol_name),
-	    "_nds32_vector_%02d%s", vector_id, vs_str);
-
-
-  /* Everything is ready.  We can start emit vector section content.  */
-  nds32_emit_section_head_template (section_name, symbol_name,
-				    floor_log2 (nds32_isr_vector_size), false);
-
-  /* According to the vector size, the instructions in the
-     vector section may be different.  */
-  if (nds32_isr_vector_size == 4)
-    {
-      /* This block is for 4-byte vector size.
-         Hardware $VID support is necessary and only one instruction
-         is needed in vector section.  */
-      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
-			     first_level_handler_name);
+      /* (expr_list:REG_CFA_DEF_CFA (plus:SI (reg/f:SI $sp)
+					     (const_int 0))
+	 mean reset frame pointer to $sp and reset to offset 0.  */
+      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 const0_rtx);
+      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
     }
   else
     {
-      /* This block is for 16-byte vector size.
-         There is NO hardware $VID so that we need several instructions
-         such as pushing GPRs and preparing software vid at vector section.
-         For pushing GPRs, there are four variations for
-         16-byte vector content and we have to handle each combination.
-         For preparing software vid, note that the vid need to
-         be substracted vector_number_offset.  */
-      if (TARGET_REDUCED_REGS)
-	{
-	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
-	    {
-	      /* Case of reduced set registers and save_all attribute.  */
-	      fprintf (asm_out_file, "\t! reduced set regs + save_all\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0xf\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r10, 0x0\n");
+      /* Tell gcc we adjust SP in this insn.  */
+      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA,
+			      copy_rtx (adjust_sp_rtx), dwarf);
+    }
 
-	    }
-	  else
-	    {
-	      /* Case of reduced set registers and partial_save attribute.  */
-	      fprintf (asm_out_file, "\t! reduced set regs + partial_save\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0x2\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
-	    }
-	}
-      else
-	{
-	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
-	    {
-	      /* Case of full set registers and save_all attribute.  */
-	      fprintf (asm_out_file, "\t! full set regs + save_all\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r27, 0xf\n");
-	    }
-	  else
-	    {
-	      /* Case of full set registers and partial_save attribute.  */
-	      fprintf (asm_out_file, "\t! full set regs + partial_save\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r27, 0x2\n");
-	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
-	    }
-	}
+  parallel_insn = emit_insn (parallel_insn);
 
-      fprintf (asm_out_file, "\tmovi\t$r0, %d ! preparing software vid\n",
-			     vector_id - vector_number_offset);
-      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
-			     first_level_handler_name);
-    }
+  /* The insn rtx 'parallel_insn' will change frame layout.
+     We need to use RTX_FRAME_RELATED_P so that GCC is able to
+     generate CFI (Call Frame Information) stuff.  */
+  RTX_FRAME_RELATED_P (parallel_insn) = 1;
 
-  nds32_emit_section_tail_template (symbol_name);
+  /* Add CFI info by manual.  */
+  REG_NOTES (parallel_insn) = dwarf;
 }
 
-/* Function to emit isr reset handler content.
-   Including all jmptbl/vector references, jmptbl section,
-   vector section, nmi handler section, and warm handler section.  */
-static void
-nds32_emit_isr_reset_content (void)
-{
-  unsigned int i;
-  unsigned int total_n_vectors;
-  const char *vs_str;
-  char reset_handler_name[100];
-  char section_name[100];
-  char symbol_name[100];
-
-  total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
-
-  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
-
-  /* Create references in .rodata according to total number of vectors.  */
-  fprintf (asm_out_file, "\t.section\t.rodata\n");
-  fprintf (asm_out_file, "\t.align\t2\n");
-
-  /* Emit jmptbl references.  */
-  fprintf (asm_out_file, "\t ! references to jmptbl section entries\n");
-  for (i = 0; i < total_n_vectors; i++)
-    fprintf (asm_out_file, "\t.word\t_nds32_jmptbl_%02d\n", i);
-
-  /* Emit vector references.  */
-  fprintf (asm_out_file, "\t ! references to vector section entries\n");
-  for (i = 0; i < total_n_vectors; i++)
-    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str);
-
-  /* Emit jmptbl_00 section.  */
-  snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_jmptbl_00");
-
-  fprintf (asm_out_file, "\t! ....................................\n");
-  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
-  fprintf (asm_out_file, "\t.word\t%s\n",
-			 nds32_isr_vectors[0].func_name);
-  nds32_emit_section_tail_template (symbol_name);
-
-  /* Emit vector_00 section.  */
-  snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str);
-  snprintf (reset_handler_name, sizeof (reset_handler_name),
-	    "_nds32_reset%s", vs_str);
-
-  fprintf (asm_out_file, "\t! ....................................\n");
-  nds32_emit_section_head_template (section_name, symbol_name,
-				    floor_log2 (nds32_isr_vector_size), false);
-  fprintf (asm_out_file, "\tj\t%s ! jump to reset handler\n",
-			 reset_handler_name);
-  nds32_emit_section_tail_template (symbol_name);
-
-  /* Emit nmi handler section.  */
-  snprintf (section_name, sizeof (section_name), ".nds32_nmih");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_nmih");
-
-  fprintf (asm_out_file, "\t! ....................................\n");
-  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
-  fprintf (asm_out_file, "\t.word\t%s\n",
-			 (strlen (nds32_isr_vectors[0].nmi_name) == 0)
-			 ? "0"
-			 : nds32_isr_vectors[0].nmi_name);
-  nds32_emit_section_tail_template (symbol_name);
-
-  /* Emit warm handler section.  */
-  snprintf (section_name, sizeof (section_name), ".nds32_wrh");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_wrh");
-
-  fprintf (asm_out_file, "\t! ....................................\n");
-  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
-  fprintf (asm_out_file, "\t.word\t%s\n",
-			 (strlen (nds32_isr_vectors[0].warm_name) == 0)
-			 ? "0"
-			 : nds32_isr_vectors[0].warm_name);
-  nds32_emit_section_tail_template (symbol_name);
-
-  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - END !\n");
-}
-
-/* Function for nds32_merge_decl_attributes() and nds32_insert_attributes()
-   to check if there are any conflict isr-specific attributes being set.
-   We need to check:
-     1. Only 'save_all' or 'partial_save' in the attributes.
-     2. Only 'nested', 'not_nested', or 'nested_ready' in the attributes.
-     3. Only 'interrupt', 'exception', or 'reset' in the attributes.  */
-static void
-nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
-{
-  int save_all_p, partial_save_p;
-  int nested_p, not_nested_p, nested_ready_p;
-  int intr_p, excp_p, reset_p;
-
-  /* Initialize variables.  */
-  save_all_p = partial_save_p = 0;
-  nested_p = not_nested_p = nested_ready_p = 0;
-  intr_p = excp_p = reset_p = 0;
-
-  /* We must check at MOST one attribute to set save-reg.  */
-  if (lookup_attribute ("save_all", func_attrs))
-    save_all_p = 1;
-  if (lookup_attribute ("partial_save", func_attrs))
-    partial_save_p = 1;
-
-  if ((save_all_p + partial_save_p) > 1)
-    error ("multiple save reg attributes to function %qD", func_decl);
-
-  /* We must check at MOST one attribute to set nested-type.  */
-  if (lookup_attribute ("nested", func_attrs))
-    nested_p = 1;
-  if (lookup_attribute ("not_nested", func_attrs))
-    not_nested_p = 1;
-  if (lookup_attribute ("nested_ready", func_attrs))
-    nested_ready_p = 1;
-
-  if ((nested_p + not_nested_p + nested_ready_p) > 1)
-    error ("multiple nested types attributes to function %qD", func_decl);
-
-  /* We must check at MOST one attribute to
-     set interrupt/exception/reset.  */
-  if (lookup_attribute ("interrupt", func_attrs))
-    intr_p = 1;
-  if (lookup_attribute ("exception", func_attrs))
-    excp_p = 1;
-  if (lookup_attribute ("reset", func_attrs))
-    reset_p = 1;
-
-  if ((intr_p + excp_p + reset_p) > 1)
-    error ("multiple interrupt attributes to function %qD", func_decl);
-}
-
-/* Function to construct isr vectors information array.
-   We DO NOT HAVE TO check if the attributes are valid
-   because those works are supposed to be done on
-   nds32_merge_decl_attributes() and nds32_insert_attributes().  */
 static void
-nds32_construct_isr_vectors_information (tree func_attrs,
-					 const char *func_name)
+nds32_emit_load_gp (void)
 {
-  tree save_all, partial_save;
-  tree nested, not_nested, nested_ready;
-  tree intr, excp, reset;
-
-  save_all     = lookup_attribute ("save_all", func_attrs);
-  partial_save = lookup_attribute ("partial_save", func_attrs);
-
-  nested       = lookup_attribute ("nested", func_attrs);
-  not_nested   = lookup_attribute ("not_nested", func_attrs);
-  nested_ready = lookup_attribute ("nested_ready", func_attrs);
-
-  intr  = lookup_attribute ("interrupt", func_attrs);
-  excp  = lookup_attribute ("exception", func_attrs);
-  reset = lookup_attribute ("reset", func_attrs);
-
-  /* If there is no interrupt/exception/reset, we can return immediately.  */
-  if (!intr && !excp && !reset)
-    return;
-
-  /* If we are here, either we have interrupt/exception,
-     or reset attribute.  */
-  if (intr || excp)
-    {
-      tree id_list;
-
-      /* Prepare id list so that we can traverse and set vector id.  */
-      id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp));
-
-      while (id_list)
-	{
-	  tree id;
-	  int vector_id;
-	  unsigned int vector_number_offset;
-
-	  /* The way to handle interrupt or exception is the same,
-	     we just need to take care of actual vector number.
-	     For interrupt(0..63), the actual vector number is (9..72).
-	     For exception(1..8), the actual vector number is (1..8).  */
-	  vector_number_offset = (intr) ? (9) : (0);
-
-	  /* Pick up each vector id value.  */
-	  id = TREE_VALUE (id_list);
-	  /* Add vector_number_offset to get actual vector number.  */
-	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
-
-	  /* Enable corresponding vector and set function name.  */
-	  nds32_isr_vectors[vector_id].category = (intr)
-						  ? (NDS32_ISR_INTERRUPT)
-						  : (NDS32_ISR_EXCEPTION);
-	  strcpy (nds32_isr_vectors[vector_id].func_name, func_name);
-
-	  /* Set register saving scheme.  */
-	  if (save_all)
-	    nds32_isr_vectors[vector_id].save_reg = NDS32_SAVE_ALL;
-	  else if (partial_save)
-	    nds32_isr_vectors[vector_id].save_reg = NDS32_PARTIAL_SAVE;
-
-	  /* Set nested type.  */
-	  if (nested)
-	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED;
-	  else if (not_nested)
-	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
-	  else if (nested_ready)
-	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
-
-	  /* Advance to next id.  */
-	  id_list = TREE_CHAIN (id_list);
-	}
-    }
-  else
-    {
-      tree id_list;
-      tree id;
-      tree nmi, warm;
+  rtx got_symbol, pat;
 
-      /* Deal with reset attribute.  Its vector number is always 0.  */
-      nds32_isr_vectors[0].category = NDS32_ISR_RESET;
-
-      /* Prepare id_list and identify id value so that
-         we can set total number of vectors.  */
-      id_list = TREE_VALUE (reset);
-      id = TREE_VALUE (id_list);
-
-      /* The total vectors = interrupt + exception numbers + reset.
-         There are 8 exception and 1 reset in nds32 architecture.  */
-      nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1;
-      strcpy (nds32_isr_vectors[0].func_name, func_name);
-
-      /* Retrieve nmi and warm function.  */
-      nmi  = lookup_attribute ("nmi", func_attrs);
-      warm = lookup_attribute ("warm", func_attrs);
-
-      if (nmi != NULL_TREE)
-	{
-	  tree nmi_func_list;
-	  tree nmi_func;
-
-	  nmi_func_list = TREE_VALUE (nmi);
-	  nmi_func = TREE_VALUE (nmi_func_list);
-
-	  /* Record nmi function name.  */
-	  strcpy (nds32_isr_vectors[0].nmi_name,
-		  IDENTIFIER_POINTER (nmi_func));
-	}
+  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
+  emit_insn (gen_blockage ());
 
-      if (warm != NULL_TREE)
-	{
-	  tree warm_func_list;
-	  tree warm_func;
+  got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */
+  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
+  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8)));
+  emit_insn (gen_sethi (pic_offset_table_rtx,pat));
+
+  /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */
+  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
+  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4)));
+  emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat));
 
-	  warm_func_list = TREE_VALUE (warm);
-	  warm_func = TREE_VALUE (warm_func_list);
+  /* add5.pc $gp */
+  emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx));
 
-	  /* Record warm function name.  */
-	  strcpy (nds32_isr_vectors[0].warm_name,
-		  IDENTIFIER_POINTER (warm_func));
-	}
-    }
+  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
+  emit_insn (gen_blockage ());
 }
 
 /* Function that may creates more instructions
@@ -1362,74 +1347,65 @@
    the adjustment value is not able to be fit in the 'addi' instruction.
    One solution is to move value into a register
    and then use 'add' instruction.
-   In practice, we use TA_REGNUM ($r15) to accomplish this purpose.
-   Also, we need to return zero for sp adjustment so that
-   proglogue/epilogue knows there is no need to create 'addi' instruction.  */
-static int
-nds32_force_addi_stack_int (int full_value)
+   In practice, we use TA_REGNUM ($r15) to accomplish this purpose.  */
+static void
+nds32_emit_adjust_frame (rtx to_reg, rtx from_reg, int adjust_value)
 {
-  int adjust_value;
-
   rtx tmp_reg;
-  rtx sp_adjust_insn;
+  rtx frame_adjust_insn;
+  rtx adjust_value_rtx = GEN_INT (adjust_value);
 
-  if (!satisfies_constraint_Is15 (GEN_INT (full_value)))
+  if (adjust_value == 0)
+    return;
+
+  if (!satisfies_constraint_Is15 (adjust_value_rtx))
     {
       /* The value is not able to fit in single addi instruction.
-         Create more instructions of moving value into a register
-         and then add stack pointer with it.  */
+	 Create more instructions of moving value into a register
+	 and then add stack pointer with it.  */
 
       /* $r15 is going to be temporary register to hold the value.  */
       tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
 
       /* Create one more instruction to move value
-         into the temporary register.  */
-      emit_move_insn (tmp_reg, GEN_INT (full_value));
+	 into the temporary register.  */
+      emit_move_insn (tmp_reg, adjust_value_rtx);
 
       /* Create new 'add' rtx.  */
-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-				   stack_pointer_rtx,
-				   tmp_reg);
+      frame_adjust_insn = gen_addsi3 (to_reg,
+				      from_reg,
+				      tmp_reg);
       /* Emit rtx into insn list and receive its transformed insn rtx.  */
-      sp_adjust_insn = emit_insn (sp_adjust_insn);
+      frame_adjust_insn = emit_insn (frame_adjust_insn);
 
-      /* At prologue, we need to tell GCC that this is frame related insn,
-         so that we can consider this instruction to output debug information.
-         If full_value is NEGATIVE, it means this function
-         is invoked by expand_prologue.  */
-      if (full_value < 0)
-	{
-	  /* Because (tmp_reg <- full_value) may be split into two
-	     rtl patterns, we can not set its RTX_FRAME_RELATED_P.
-	     We need to construct another (sp <- sp + full_value)
-	     and then insert it into sp_adjust_insn's reg note to
-	     represent a frame related expression.
-	     GCC knows how to refer it and output debug information.  */
-
-	  rtx plus_rtx;
-	  rtx set_rtx;
-
-	  plus_rtx = plus_constant (Pmode, stack_pointer_rtx, full_value);
-	  set_rtx = gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_rtx);
-	  add_reg_note (sp_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx);
-
-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
-	}
-
-      /* We have used alternative way to adjust stack pointer value.
-         Return zero so that prologue/epilogue
-         will not generate other instructions.  */
-      return 0;
+      /* Because (tmp_reg <- full_value) may be split into two
+	 rtl patterns, we can not set its RTX_FRAME_RELATED_P.
+	 We need to construct another (sp <- sp + full_value)
+	 and then insert it into sp_adjust_insn's reg note to
+	 represent a frame related expression.
+	 GCC knows how to refer it and output debug information.  */
+
+      rtx plus_rtx;
+      rtx set_rtx;
+
+      plus_rtx = plus_constant (Pmode, from_reg, adjust_value);
+      set_rtx = gen_rtx_SET (VOIDmode, to_reg, plus_rtx);
+      add_reg_note (frame_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx);
     }
   else
     {
-      /* The value is able to fit in addi instruction.
-         However, remember to make it to be positive value
-         because we want to return 'adjustment' result.  */
-      adjust_value = (full_value < 0) ? (-full_value) : (full_value);
-
-      return adjust_value;
+      /* Generate sp adjustment instruction if and only if sp_adjust != 0.  */
+      frame_adjust_insn = gen_addsi3 (to_reg,
+				      from_reg,
+				      adjust_value_rtx);
+      /* Emit rtx into instructions list and receive INSN rtx form.  */
+      frame_adjust_insn = emit_insn (frame_adjust_insn);
     }
+
+    /* The insn rtx 'sp_adjust_insn' will change frame layout.
+       We need to use RTX_FRAME_RELATED_P so that GCC is able to
+       generate CFI (Call Frame Information) stuff.  */
+    RTX_FRAME_RELATED_P (frame_adjust_insn) = 1;
 }
 
 /* Return true if MODE/TYPE need double word alignment.  */
@@ -1444,18 +1420,25 @@
   return (align > PARM_BOUNDARY);
 }
 
-/* Return true if FUNC is a naked function.  */
-static bool
+bool
 nds32_naked_function_p (tree func)
 {
-  tree t;
+  /* FOR BACKWARD COMPATIBILITY,
+     we need to support 'no_prologue' attribute as well.  */
+  tree t_naked;
+  tree t_no_prologue;
 
   if (TREE_CODE (func) != FUNCTION_DECL)
     abort ();
 
-  t = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  /* We have to use lookup_attribute() to check attributes.
+     Because attr_naked_p and attr_no_prologue_p are set in
+     nds32_compute_stack_frame() and the function has not been
+     invoked yet.  */
+  t_naked       = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func));
 
-  return (t != NULL_TREE);
+  return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE));
 }
 
 /* Function that check if 'X' is a valid address register.
@@ -1464,7 +1447,7 @@
 
    STRICT : true
      => We are in reload pass or after reload pass.
-        The register number should be strictly limited in general registers.
+	The register number should be strictly limited in general registers.
 
    STRICT : false
      => Before reload pass, we are free to use any register number.  */
@@ -1487,7 +1470,7 @@
 /* Function that check if 'INDEX' is valid to be a index rtx for address.
 
    OUTER_MODE : Machine mode of outer address rtx.
-        INDEX : Check if this rtx is valid to be a index for address.
+	INDEX : Check if this rtx is valid to be a index for address.
        STRICT : If it is true, we are in reload pass or after reload pass.  */
 static bool
 nds32_legitimate_index_p (enum machine_mode outer_mode,
@@ -1503,7 +1486,7 @@
     case REG:
       regno = REGNO (index);
       /* If we are in reload pass or after reload pass,
-         we need to limit it to general register.  */
+	 we need to limit it to general register.  */
       if (strict)
 	return REGNO_OK_FOR_INDEX_P (regno);
       else
@@ -1511,45 +1494,73 @@
 
     case CONST_INT:
       /* The alignment of the integer value is determined by 'outer_mode'.  */
-      if (GET_MODE_SIZE (outer_mode) == 1)
+      switch (GET_MODE_SIZE (outer_mode))
 	{
+	case 1:
 	  /* Further check if the value is legal for the 'outer_mode'.  */
-	  if (!satisfies_constraint_Is15 (index))
-	    return false;
+	  if (satisfies_constraint_Is15 (index))
+	    return true;
+	  break;
 
-	  /* Pass all test, the value is valid, return true.  */
-	  return true;
-	}
-      if (GET_MODE_SIZE (outer_mode) == 2
-	  && NDS32_HALF_WORD_ALIGN_P (INTVAL (index)))
-	{
+	case 2:
 	  /* Further check if the value is legal for the 'outer_mode'.  */
-	  if (!satisfies_constraint_Is16 (index))
-	    return false;
+	  if (satisfies_constraint_Is16 (index))
+	    {
+	      /* If it is not under strictly aligned situation,
+		 we can return true without checking alignment.  */
+	      if (!cfun->machine->strict_aligned_p)
+		return true;
+	     /* Make sure address is half word alignment.  */
+	      else if (NDS32_HALF_WORD_ALIGN_P (INTVAL (index)))
+		return true;
+	    }
+	  break;
 
-	  /* Pass all test, the value is valid, return true.  */
-	  return true;
-	}
-      if (GET_MODE_SIZE (outer_mode) == 4
-	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
-	{
+	case 4:
 	  /* Further check if the value is legal for the 'outer_mode'.  */
-	  if (!satisfies_constraint_Is17 (index))
-	    return false;
+	  if (satisfies_constraint_Is17 (index))
+	    {
+	      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
+		{
+		  if (!satisfies_constraint_Is14 (index))
+		    return false;
+		}
+
+	      /* If it is not under strictly aligned situation,
+		 we can return true without checking alignment.  */
+	      if (!cfun->machine->strict_aligned_p)
+		return true;
+	     /* Make sure address is word alignment.  */
+	      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+		return true;
+	    }
+	  break;
 
-	  /* Pass all test, the value is valid, return true.  */
-	  return true;
-	}
-      if (GET_MODE_SIZE (outer_mode) == 8
-	  && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
-	{
-	  /* Further check if the value is legal for the 'outer_mode'.  */
-	  if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4,
-							SImode)))
-	    return false;
+	case 8:
+	  if (satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4,
+						       SImode)))
+	    {
+	      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
+		{
+		  if (!satisfies_constraint_Is14 (index))
+		    return false;
+		}
+
+	      /* If it is not under strictly aligned situation,
+		 we can return true without checking alignment.  */
+	      if (!cfun->machine->strict_aligned_p)
+		return true;
+	     /* Make sure address is word alignment.
+		Currently we do not have 64-bit load/store yet,
+		so we will use two 32-bit load/store instructions to do
+		memory access and they are single word alignment.  */
+	      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index)))
+		return true;
+	    }
+	  break;
 
-	  /* Pass all test, the value is valid, return true.  */
-	  return true;
+	default:
+	  return false;
 	}
 
       return false;
@@ -1563,9 +1574,10 @@
 	  int multiplier;
 	  multiplier = INTVAL (op1);
 
-	  /* We only allow (mult reg const_int_1)
-	     or (mult reg const_int_2) or (mult reg const_int_4).  */
-	  if (multiplier != 1 && multiplier != 2 && multiplier != 4)
+	  /* We only allow (mult reg const_int_1), (mult reg const_int_2),
+	     (mult reg const_int_4) or (mult reg const_int_8).  */
+	  if (multiplier != 1 && multiplier != 2
+	      && multiplier != 4 && multiplier != 8)
 	    return false;
 
 	  regno = REGNO (op0);
@@ -1590,8 +1602,9 @@
 	  sv = INTVAL (op1);
 
 	  /* We only allow (ashift reg const_int_0)
-	     or (ashift reg const_int_1) or (ashift reg const_int_2).  */
-	  if (sv != 0 && sv != 1 && sv !=2)
+	     or (ashift reg const_int_1) or (ashift reg const_int_2) or
+	     (ashift reg const_int_3).  */
+	  if (sv != 0 && sv != 1 && sv !=2 && sv != 3)
 	    return false;
 
 	  regno = REGNO (op0);
@@ -1610,135 +1623,265 @@
     }
 }
 
-/* Function to expand builtin function for
-   '[(unspec_volatile [(reg)])]'.  */
-static rtx
-nds32_expand_builtin_null_ftype_reg (enum insn_code icode,
-				     tree exp, rtx target)
+static void
+nds32_insert_innermost_loop (void)
 {
-  /* Mapping:
-       ops[0] <--> value0 <--> arg0 */
-  struct expand_operand ops[1];
-  tree arg0;
-  rtx value0;
+  struct loop *loop;
+  basic_block *bbs, bb;
 
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  value0 = expand_normal (arg0);
+  compute_bb_for_insn ();
+  /* initial loop structure */
+  loop_optimizer_init (0);
 
-  /* Create operands.  */
-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
-
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 1, ops))
-    error ("invalid argument to built-in function");
+  /* Scan all inner most loops.  */
+  FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
+    {
+      bbs = get_loop_body (loop);
+      bb = *bbs;
+      free (bbs);
 
-  return target;
-}
+      emit_insn_before (gen_innermost_loop_begin (),
+			BB_HEAD (bb));
 
-/* Function to expand builtin function for
-   '[(set (reg) (unspec_volatile [(imm)]))]'.  */
-static rtx
-nds32_expand_builtin_reg_ftype_imm (enum insn_code icode,
-				    tree exp, rtx target)
-{
-  /* Mapping:
-       ops[0] <--> target <--> exp
-       ops[1] <--> value0 <--> arg0 */
-  struct expand_operand ops[2];
-  tree arg0;
-  rtx value0;
+      /* Find the final basic block in the loop.  */
+      while (bb)
+	{
+	  if (bb->next_bb == NULL)
+	    break;
 
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  value0 = expand_normal (arg0);
+	  if (bb->next_bb->loop_father != loop)
+	    break;
 
-  /* Create operands.  */
-  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp)));
-  create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0)));
+	  bb = bb->next_bb;
+	}
 
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 2, ops))
-    error ("invalid argument to built-in function");
+      emit_insn_before (gen_innermost_loop_end (),
+			BB_END (bb));
+    }
 
-  return target;
+  /* release loop structre */
+  loop_optimizer_finalize ();
 }
 
-/* Function to expand builtin function for
-   '[(unspec_volatile [(reg) (imm)])]' pattern.  */
-static rtx
-nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode,
-					 tree exp, rtx target)
+/* Insert isps for function with signature attribute.  */
+static void
+nds32_insert_isps (void)
 {
-  /* Mapping:
-       ops[0] <--> value0 <--> arg0
-       ops[1] <--> value1 <--> arg1 */
-  struct expand_operand ops[2];
-  tree arg0, arg1;
-  rtx value0, value1;
-
-  /* Grab the incoming arguments and extract its rtx.  */
-  arg0 = CALL_EXPR_ARG (exp, 0);
-  arg1 = CALL_EXPR_ARG (exp, 1);
-  value0 = expand_normal (arg0);
-  value1 = expand_normal (arg1);
-
-  /* Create operands.  */
-  create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0)));
-  create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1)));
-
-  /* Emit new instruction.  */
-  if (!maybe_expand_insn (icode, 2, ops))
-    error ("invalid argument to built-in function");
-
-  return target;
-}
-
-/* A helper function to return character based on byte size.  */
-static char
-nds32_byte_to_size (int byte)
-{
-  switch (byte)
-    {
-    case 4:
-      return 'w';
-    case 2:
-      return 'h';
-    case 1:
-      return 'b';
-    default:
-      /* Normally it should not be here.  */
-      gcc_unreachable ();
+  rtx insn;
+  unsigned first = 0;
+
+  if (!lookup_attribute ("signature", DECL_ATTRIBUTES (current_function_decl)))
+    return;
+
+  insn = get_insns ();
+  while (insn)
+    {
+      /* In order to ensure protect whole function, emit the first
+	 isps here rather than in prologue.*/
+      if (!first && INSN_P (insn))
+	{
+	  emit_insn_before (gen_unspec_signature_begin (), insn);
+	  first = 1;
+	}
+
+      if (LABEL_P (insn) || CALL_P (insn) || any_condjump_p (insn)
+	  || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+	      && (XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_SYSCALL
+		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TRAP
+		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TEQZ
+		  || XINT (PATTERN (insn), 1) == UNSPEC_VOLATILE_TNEZ)))
+	{
+	  emit_insn_after (gen_unspec_signature_begin (), insn);
+	}
+      insn = NEXT_INSN (insn);
     }
 }
 
-/* A helper function to check if this function should contain prologue.  */
-static int
-nds32_have_prologue_p (void)
+static void
+nds32_register_pass (
+  rtl_opt_pass *(*make_pass_func) (gcc::context *),
+  enum pass_positioning_ops pass_pos,
+  const char *ref_pass_name)
 {
-  int i;
+  opt_pass *new_opt_pass = make_pass_func (g);
+
+  struct register_pass_info insert_pass =
+    {
+      new_opt_pass,	/* pass */
+      ref_pass_name,	/* reference_pass_name */
+      1,		/* ref_pass_instance_number */
+      pass_pos		/* po_op */
+    };
+
+  register_pass (&insert_pass);
+}
 
-  for (i = 0; i < 28; i++)
-    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
-      return 1;
-
-  return (flag_pic
-	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
-	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+/* This function is called from nds32_option_override ().
+   All new passes should be registered here.  */
+static void
+nds32_register_passes (void)
+{
+  nds32_register_pass (
+    make_pass_nds32_fp_as_gp,
+    PASS_POS_INSERT_BEFORE,
+    "ira");
+
+  nds32_register_pass (
+    make_pass_nds32_relax_opt,
+    PASS_POS_INSERT_AFTER,
+    "mach");
+
+  nds32_register_pass (
+    make_pass_nds32_hwloop2_opt,
+    PASS_POS_INSERT_BEFORE,
+    "mach");
+
+  nds32_register_pass (
+    make_pass_nds32_load_store_opt,
+    PASS_POS_INSERT_AFTER,
+    "mach");
+
+  nds32_register_pass (
+    make_pass_nds32_soft_fp_arith_comm_opt,
+    PASS_POS_INSERT_BEFORE,
+    "ira");
+
+  nds32_register_pass (
+    make_pass_nds32_regrename_opt,
+    PASS_POS_INSERT_AFTER,
+    "mach");
+
+  nds32_register_pass (
+    make_pass_nds32_gcse_opt,
+    PASS_POS_INSERT_BEFORE,
+    "cprop_hardreg");
+
+  nds32_register_pass (
+    make_pass_cprop_hardreg,
+    PASS_POS_INSERT_AFTER,
+    "mach");
+
+  nds32_register_pass (
+    make_pass_nds32_hwloop1_opt,
+    PASS_POS_INSERT_BEFORE,
+    "ira");
+
+  if (TARGET_PRINT_STALLS)
+    nds32_register_pass (
+      nds32::scheduling::make_pass_nds32_print_stalls,
+      PASS_POS_INSERT_BEFORE,
+      "final");
 }
 
 /* ------------------------------------------------------------------------ */
 
-/* PART 3: Implement target hook stuff definitions.  */
+/* PART 4: Implement target hook stuff definitions.  */
+
+
+/* Computing the Length of an Insn.
+   Modifies the length assigned to instruction INSN.
+   LEN is the initially computed length of the insn.  */
+int
+nds32_adjust_insn_length (rtx insn, int length)
+{
+  int adjust_value = 0;
+  switch (recog_memoized (insn))
+    {
+    case CODE_FOR_call_immediate_align:
+    case CODE_FOR_call_value_immediate_align:
+    case CODE_FOR_call_register_align:
+    case CODE_FOR_call_value_register_align:
+      {
+	rtx next_insn = next_active_insn (insn);
+	if (next_insn && get_attr_length (next_insn) != 2)
+	  adjust_value += 2;
+      }
+      /* FALLTHRU */
+    case CODE_FOR_call_immediate:
+    case CODE_FOR_call_value_immediate:
+    case CODE_FOR_call_register:
+    case CODE_FOR_call_value_register:
+      {
+	/* We need insert a nop after a noretun function call
+	   to prevent software breakpoint corrupt the next function. */
+	if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
+	  {
+	    if (TARGET_16_BIT)
+	      adjust_value += 2;
+	    else
+	      adjust_value += 4;
+	  }
+      }
+      return length + adjust_value;
+
+    default:
+      return length;
+    }
+}
+
+/* Storage Layout.  */
+
+/* This function will be called just before expansion into rtl.  */
+static void
+nds32_expand_to_rtl_hook (void)
+{
+  /* We need to set strictly aligned situation.
+     After that, the memory address checking in nds32_legitimate_address_p()
+     will take alignment offset into consideration so that it will not create
+     unaligned [base + offset] access during the rtl optimization.  */
+  cfun->machine->strict_aligned_p = 1;
+}
+
+
+/* Register Usage.  */
+
+static void
+nds32_conditional_register_usage (void)
+{
+  int regno;
+
+  if (TARGET_LINUX_ABI)
+    fixed_regs[TP_REGNUM] = 1;
+
+  if (TARGET_HARD_FLOAT)
+    {
+      for (regno = NDS32_FIRST_FPR_REGNUM;
+	   regno <= NDS32_LAST_FPR_REGNUM; regno++)
+	{
+	  fixed_regs[regno] = 0;
+	  if (regno < NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS)
+	    call_used_regs[regno] = 1;
+	  else if (regno >= NDS32_FIRST_FPR_REGNUM + 22
+		   && regno < NDS32_FIRST_FPR_REGNUM + 48)
+	    call_used_regs[regno] = 1;
+	  else
+	    call_used_regs[regno] = 0;
+	}
+    }
+  else if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+    {
+      for (regno = NDS32_FIRST_FPR_REGNUM;
+	   regno <= NDS32_LAST_FPR_REGNUM;
+	   regno++)
+	fixed_regs[regno] = 0;
+    }
+}
+
 
 /* Register Classes.  */
 
+static reg_class_t
+nds32_preferred_rename_class (reg_class_t rclass)
+{
+  return nds32_preferred_rename_class_impl (rclass);
+}
+
 static unsigned char
 nds32_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED,
 		       enum machine_mode mode)
 {
   /* Return the maximum number of consecutive registers
-     needed to represent "mode" in a register of "rclass".  */
+     needed to represent MODE in a register of RCLASS.  */
   return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
 }
 
@@ -1746,9 +1889,24 @@
 nds32_register_priority (int hard_regno)
 {
   /* Encourage to use r0-r7 for LRA when optimize for size.  */
-  if (optimize_size && hard_regno < 8)
-    return 4;
-  return 3;
+  if (optimize_size)
+    {
+      if (hard_regno < 8)
+	return 4;
+      else if (hard_regno < 16)
+	return 3;
+      else if (hard_regno < 28)
+	return 2;
+      else
+	return 1;
+    }
+  else
+    {
+      if (hard_regno > 27)
+	return 1;
+      else
+	return 4;
+    }
 }
 
 
@@ -1768,8 +1926,8 @@
        2. return address
        3. callee-saved registers
        4. <padding bytes> (we will calculte in nds32_compute_stack_frame()
-                           and save it at
-                           cfun->machine->callee_saved_area_padding_bytes)
+			   and save it at
+			   cfun->machine->callee_saved_area_padding_bytes)
 
      [Block B]
        1. local variables
@@ -1787,36 +1945,37 @@
    By applying the basic frame/stack/argument pointers concept,
    the layout of a stack frame shoule be like this:
 
-                            |    |
+			    |    |
        old stack pointer ->  ----
-                            |    | \
-                            |    |   saved arguments for
-                            |    |   vararg functions
-                            |    | /
+			    |    | \
+			    |    |   saved arguments for
+			    |    |   vararg functions
+			    |    | /
       hard frame pointer ->   --
       & argument pointer    |    | \
-                            |    |   previous hardware frame pointer
-                            |    |   return address
-                            |    |   callee-saved registers
-                            |    | /
-           frame pointer ->   --
-                            |    | \
-                            |    |   local variables
-                            |    |   and incoming arguments
-                            |    | /
-                              --
-                            |    | \
-                            |    |   outgoing
-                            |    |   arguments
-                            |    | /
-           stack pointer ->  ----
+			    |    |   previous hardware frame pointer
+			    |    |   return address
+			    |    |   callee-saved registers
+			    |    | /
+	   frame pointer ->   --
+			    |    | \
+			    |    |   local variables
+			    |    |   and incoming arguments
+			    |    | /
+			      --
+			    |    | \
+			    |    |   outgoing
+			    |    |   arguments
+			    |    | /
+	   stack pointer ->  ----
 
   $SFP and $AP are used to represent frame pointer and arguments pointer,
   which will be both eliminated as hard frame pointer.  */
 
 /* -- Eliminating Frame Pointer and Arg Pointer.  */
 
-static bool nds32_can_eliminate (const int from_reg, const int to_reg)
+static bool
+nds32_can_eliminate (const int from_reg, const int to_reg)
 {
   if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
     return true;
@@ -1839,6 +1998,7 @@
 nds32_function_arg (cumulative_args_t ca, enum machine_mode mode,
 		    const_tree type, bool named)
 {
+  unsigned int regno;
   CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
 
   /* The last time this hook is called,
@@ -1846,25 +2006,131 @@
   if (mode == VOIDmode)
     return NULL_RTX;
 
-  /* For nameless arguments, they are passed on the stack.  */
+  /* For nameless arguments, we need to take care it individually.  */
   if (!named)
-    return NULL_RTX;
-
-  /* If there are still registers available, return it.  */
-  if (NDS32_ARG_PASS_IN_REG_P (cum->reg_offset, mode, type))
     {
-      /* Pick up the next available register number.  */
-      unsigned int regno;
+      /* If we are under hard float abi, we have arguments passed on the
+	 stack and all situation can be handled by GCC itself.  */
+      if (TARGET_HARD_FLOAT)
+	return NULL_RTX;
+
+      if (NDS32_ARG_PARTIAL_IN_GPR_REG_P (cum->gpr_offset, mode, type))
+	{
+	  /* If we still have enough registers to pass argument, pick up
+	     next available register number.  */
+	  regno
+	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type);
+	  return gen_rtx_REG (mode, regno);
+	}
+
+      /* No register available, return NULL_RTX.
+	 The compiler will use stack to pass argument instead.  */
+      return NULL_RTX;
+    }
 
-      regno = NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, mode, type);
-      return gen_rtx_REG (mode, regno);
+  /* The following is to handle named argument.
+     Note that the strategies of TARGET_HARD_FLOAT and !TARGET_HARD_FLOAT
+     are different.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      /* For TARGET_HARD_FLOAT calling convention, we use GPR and FPR
+	 to pass argument.  We have to further check TYPE and MODE so
+	 that we can determine which kind of register we shall use.  */
+
+      /* Note that we need to pass argument entirely in registers under
+	 hard float abi.  */
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && NDS32_ARG_ENTIRE_IN_FPR_REG_P (cum->fpr_offset, mode, type))
+	{
+	  /* Pick up the next available FPR register number.  */
+	  regno
+	    = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type);
+	  return gen_rtx_REG (mode, regno);
+	}
+      else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	       && NDS32_ARG_ENTIRE_IN_GPR_REG_P (cum->gpr_offset, mode, type))
+	{
+	  /* Pick up the next available GPR register number.  */
+	  regno
+	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type);
+	  return gen_rtx_REG (mode, regno);
+	}
     }
   else
     {
-      /* No register available, return NULL_RTX.
-         The compiler will use stack to pass argument instead.  */
-      return NULL_RTX;
+      /* For !TARGET_HARD_FLOAT calling convention, we always use GPR to pass
+	 argument.  Since we allow to pass argument partially in registers,
+	 we can just return it if there are still registers available.  */
+      if (NDS32_ARG_PARTIAL_IN_GPR_REG_P (cum->gpr_offset, mode, type))
+	{
+	  /* Pick up the next available register number.  */
+	  regno
+	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type);
+	  return gen_rtx_REG (mode, regno);
+	}
+
     }
+
+  /* No register available, return NULL_RTX.
+     The compiler will use stack to pass argument instead.  */
+  return NULL_RTX;
+}
+
+static bool
+nds32_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  /* Return true if a type must be passed in memory.
+     If it is NOT using hard float abi, small aggregates can be
+     passed in a register even we are calling a variadic function.
+     So there is no need to take padding into consideration.  */
+  if (TARGET_HARD_FLOAT)
+    return must_pass_in_stack_var_size_or_pad (mode, type);
+  else
+    return must_pass_in_stack_var_size (mode, type);
+}
+
+static int
+nds32_arg_partial_bytes (cumulative_args_t ca, enum machine_mode mode,
+			 tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* Returns the number of bytes at the beginning of an argument that
+     must be put in registers.  The value must be zero for arguments that are
+     passed entirely in registers or that are entirely pushed on the stack.
+     Besides, TARGET_FUNCTION_ARG for these arguments should return the
+     first register to be used by the caller for this argument.  */
+  unsigned int needed_reg_count;
+  unsigned int remaining_reg_count;
+  CUMULATIVE_ARGS *cum;
+
+  cum = get_cumulative_args (ca);
+
+  /* Under hard float abi, we better have argument entirely passed in
+     registers or pushed on the stack so that we can reduce the complexity
+     of dealing with cum->gpr_offset and cum->fpr_offset.  */
+  if (TARGET_HARD_FLOAT)
+    return 0;
+
+  /* If we have already runned out of argument registers, return zero
+     so that the argument will be entirely pushed on the stack.  */
+  if (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+      >= NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS)
+    return 0;
+
+  /* Calculate how many registers do we need for this argument.  */
+  needed_reg_count = NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+
+  /* Calculate how many argument registers have left for passing argument.
+     Note that we should count it from next available register number.  */
+  remaining_reg_count
+    = NDS32_MAX_GPR_REGS_FOR_ARGS
+      - (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+	 - NDS32_GPR_ARG_FIRST_REGNUM);
+
+  /* Note that we have to return the nubmer of bytes, not registers count.  */
+  if (needed_reg_count > remaining_reg_count)
+    return remaining_reg_count * UNITS_PER_WORD;
+
+  return 0;
 }
 
 static void
@@ -1873,14 +2139,40 @@
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
 
-  /* Advance next register for use.
-     Only named argument could be advanced.  */
   if (named)
     {
-      cum->reg_offset
-	= NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, mode, type)
-	  - NDS32_GPR_ARG_FIRST_REGNUM
-	  + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+      /* We need to further check TYPE and MODE so that we can determine
+	 which kind of register we shall advance.  */
+
+      /* Under hard float abi, we may advance FPR registers.  */
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  cum->fpr_offset
+	    = NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (cum->fpr_offset, mode, type)
+	      - NDS32_FPR_ARG_FIRST_REGNUM
+	      + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+	}
+      else
+	{
+	  cum->gpr_offset
+	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+	      - NDS32_GPR_ARG_FIRST_REGNUM
+	      + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+	}
+    }
+  else
+    {
+      /* If this nameless argument is NOT under TARGET_HARD_FLOAT,
+	 we can advance next register as well so that caller is
+	 able to pass arguments in registers and callee must be
+	 in charge of pushing all of them into stack.  */
+      if (!TARGET_HARD_FLOAT)
+	{
+	  cum->gpr_offset
+	    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+	      - NDS32_GPR_ARG_FIRST_REGNUM
+	      + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+	}
     }
 }
 
@@ -1892,6 +2184,16 @@
 	  : PARM_BOUNDARY);
 }
 
+bool
+nds32_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (mode == V4QImode
+      || mode == V2HImode)
+    return NDS32_EXT_DSP_P ();
+
+  return false;
+}
+
 /* -- How Scalar Function Values Are Returned.  */
 
 static rtx
@@ -1905,22 +2207,62 @@
   mode = TYPE_MODE (ret_type);
   unsignedp = TYPE_UNSIGNED (ret_type);
 
-  mode = promote_mode (ret_type, mode, &unsignedp);
+  if (INTEGRAL_TYPE_P (ret_type))
+    mode = promote_mode (ret_type, mode, &unsignedp);
 
-  return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
+  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+    return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM);
+  else
+    return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
 }
 
 static rtx
 nds32_libcall_value (enum machine_mode mode,
 		     const_rtx fun ATTRIBUTE_UNUSED)
 {
+  if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+    return gen_rtx_REG (mode, NDS32_FPR_RET_FIRST_REGNUM);
+
   return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM);
 }
 
 static bool
 nds32_function_value_regno_p (const unsigned int regno)
 {
-  return (regno == NDS32_GPR_RET_FIRST_REGNUM);
+  if (regno == NDS32_GPR_RET_FIRST_REGNUM
+      || (TARGET_HARD_FLOAT
+	  && regno == NDS32_FPR_RET_FIRST_REGNUM))
+    return true;
+
+  return false;
+}
+
+/* -- How Large Values Are Returned.  */
+
+static bool
+nds32_return_in_memory (const_tree type,
+			const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Note that int_size_in_bytes can return -1 if the size can vary
+     or is larger than an integer.  */
+  HOST_WIDE_INT size = int_size_in_bytes (type);
+
+  /* For COMPLEX_TYPE, if the total size cannot be hold within two registers,
+     the return value is supposed to be in memory.  We need to be aware of
+     that the size may be -1.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    if (size < 0 || size > 2 * UNITS_PER_WORD)
+      return true;
+
+  /* If it is BLKmode and the total size cannot be hold within two registers,
+     the return value is supposed to be in memory.  We need to be aware of
+     that the size may be -1.  */
+  if (TYPE_MODE (type) == BLKmode)
+    if (size < 0 || size > 2 * UNITS_PER_WORD)
+      return true;
+
+  /* For other cases, having result in memory is unnecessary.  */
+  return false;
 }
 
 /* -- Function Entry and Exit.  */
@@ -1951,7 +2293,7 @@
   /* Use df_regs_ever_live_p() to detect if the register
      is ever used in the current function.  */
   fprintf (file, "\t! registers ever_live: ");
-  for (r = 0; r < 32; r++)
+  for (r = 0; r < 65; r++)
     {
       if (df_regs_ever_live_p (r))
 	fprintf (file, "%s, ", reg_names[r]);
@@ -1983,6 +2325,10 @@
       attrs = TREE_CHAIN (attrs);
     }
   fputc ('\n', file);
+
+  /* If there is any critical isr in this file, disable linker ifc.  */
+  if (nds32_isr_function_critical_p (current_function_decl))
+    fprintf (file, "\t.no_relax ifc\n");
 }
 
 /* After rtl prologue has been expanded, this function is used.  */
@@ -1990,56 +2336,12 @@
 nds32_asm_function_end_prologue (FILE *file)
 {
   fprintf (file, "\t! END PROLOGUE\n");
-
-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
-     we can generate special directive: ".omit_fp_begin"
-     to guide linker doing fp-as-gp optimization.
-     However, for a naked function, which means
-     it should not have prologue/epilogue,
-     using fp-as-gp still requires saving $fp by push/pop behavior and
-     there is no benefit to use fp-as-gp on such small function.
-     So we need to make sure this function is NOT naked as well.  */
-  if (!frame_pointer_needed
-      && !cfun->machine->naked_p
-      && cfun->machine->fp_as_gp_p)
-    {
-      fprintf (file, "\t! ----------------------------------------\n");
-      fprintf (file, "\t! Guide linker to do "
-		     "link time optimization: fp-as-gp\n");
-      fprintf (file, "\t! We add one more instruction to "
-		     "initialize $fp near to $gp location.\n");
-      fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
-      fprintf (file, "\t! this extra instruction should be "
-		     "eliminated at link stage.\n");
-      fprintf (file, "\t.omit_fp_begin\n");
-      fprintf (file, "\tla\t$fp,_FP_BASE_\n");
-      fprintf (file, "\t! ----------------------------------------\n");
-    }
 }
 
 /* Before rtl epilogue has been expanded, this function is used.  */
 static void
 nds32_asm_function_begin_epilogue (FILE *file)
 {
-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
-     we can generate special directive: ".omit_fp_end"
-     to claim fp-as-gp optimization range.
-     However, for a naked function,
-     which means it should not have prologue/epilogue,
-     using fp-as-gp still requires saving $fp by push/pop behavior and
-     there is no benefit to use fp-as-gp on such small function.
-     So we need to make sure this function is NOT naked as well.  */
-  if (!frame_pointer_needed
-      && !cfun->machine->naked_p
-      && cfun->machine->fp_as_gp_p)
-    {
-      fprintf (file, "\t! ----------------------------------------\n");
-      fprintf (file, "\t! Claim the range of fp-as-gp "
-		     "link time optimization\n");
-      fprintf (file, "\t.omit_fp_end\n");
-      fprintf (file, "\t! ----------------------------------------\n");
-    }
-
   fprintf (file, "\t! BEGIN EPILOGUE\n");
 }
 
@@ -2067,53 +2369,157 @@
 		? 1
 		: 0);
 
+  if (flag_pic)
+    {
+      fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n");
+      fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n",
+		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
+      fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n",
+		      reg_names [PIC_OFFSET_TABLE_REGNUM],
+		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
+
+      if (TARGET_ISA_V3)
+	fprintf (file, "\tadd5.pc\t$gp\n");
+      else
+	{
+	  fprintf (file, "\tmfusr\t$ta, $pc\n");
+	  fprintf (file, "\tadd\t%s, $ta, %s\n",
+			  reg_names [PIC_OFFSET_TABLE_REGNUM],
+			  reg_names [PIC_OFFSET_TABLE_REGNUM]);
+	}
+    }
+
   if (delta != 0)
     {
       if (satisfies_constraint_Is15 (GEN_INT (delta)))
 	{
-	  fprintf (file, "\taddi\t$r%d, $r%d, %ld\n",
+	  fprintf (file, "\taddi\t$r%d, $r%d, " HOST_WIDE_INT_PRINT_DEC "\n",
 		   this_regno, this_regno, delta);
 	}
       else if (satisfies_constraint_Is20 (GEN_INT (delta)))
 	{
-	  fprintf (file, "\tmovi\t$ta, %ld\n", delta);
+	  fprintf (file, "\tmovi\t$ta, " HOST_WIDE_INT_PRINT_DEC "\n", delta);
 	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
 	}
       else
 	{
-	  fprintf (file, "\tsethi\t$ta, hi20(%ld)\n", delta);
-	  fprintf (file, "\tori\t$ta, $ta, lo12(%ld)\n", delta);
+	  fprintf (file,
+		   "\tsethi\t$ta, hi20(" HOST_WIDE_INT_PRINT_DEC ")\n",
+		   delta);
+	  fprintf (file,
+		   "\tori\t$ta, $ta, lo12(" HOST_WIDE_INT_PRINT_DEC ")\n",
+		   delta);
 	  fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno);
 	}
     }
 
-  fprintf (file, "\tb\t");
-  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
-  fprintf (file, "\n");
+  if (flag_pic)
+    {
+      fprintf (file, "\tla\t$ta, ");
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      fprintf (file, "@PLT\n");
+      fprintf (file, "\t! epilogue\n");
+      fprintf (file, "\tlwi.bi\t%s, [%s], 4\n",
+	       reg_names[PIC_OFFSET_TABLE_REGNUM],
+	       reg_names[STACK_POINTER_REGNUM]);
+      fprintf (file, "\tbr\t$ta\n");
+    }
+  else
+    {
+      fprintf (file, "\tb\t");
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      fprintf (file, "\n");
+    }
 
   final_end_function ();
 }
 
 /* -- Permitting tail calls.  */
 
+/* Return true if it is ok to do sibling call optimization.  */
+static bool
+nds32_function_ok_for_sibcall (tree decl,
+			       tree exp ATTRIBUTE_UNUSED)
+{
+  /* The DECL is NULL if it is an indirect call.  */
+
+  /* 1. Do not apply sibling call if -mv3push is enabled,
+	because pop25 instruction also represents return behavior.
+     2. If this function is a isr function, do not apply sibling call
+	because it may perform the behavior that user does not expect.
+     3. If this function is a variadic function, do not apply sibling call
+	because the stack layout may be a mess.
+     4. We don't want to apply sibling call optimization for indirect
+	sibcall because the pop behavior in epilogue may pollute the
+	content of caller-saved regsiter when the register is used for
+	indirect sibcall.
+     5. In pic mode, it may use some registers for PLT call.  */
+  return (!TARGET_V3PUSH
+	  && !nds32_isr_function_p (current_function_decl)
+	  && (cfun->machine->va_args_size == 0)
+	  && decl
+	  && !flag_pic);
+}
+
 /* Determine whether we need to enable warning for function return check.  */
 static bool
 nds32_warn_func_return (tree decl)
 {
-/* Naked functions are implemented entirely in assembly, including the
-   return sequence, so suppress warnings about this.  */
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
   return !nds32_naked_function_p (decl);
 }
 
 
 /* Implementing the Varargs Macros.  */
 
+static void
+nds32_setup_incoming_varargs (cumulative_args_t ca,
+			      enum machine_mode mode,
+			      tree type,
+			      int *pretend_args_size,
+			      int second_time ATTRIBUTE_UNUSED)
+{
+  unsigned int total_args_regs;
+  unsigned int num_of_used_regs;
+  unsigned int remaining_reg_count;
+  CUMULATIVE_ARGS *cum;
+
+  /* If we are under hard float abi, we do not need to set *pretend_args_size.
+     So that all nameless arguments are pushed by caller and all situation
+     can be handled by GCC itself.  */
+  if (TARGET_HARD_FLOAT)
+    return;
+
+  /* We are using NDS32_MAX_GPR_REGS_FOR_ARGS registers,
+     counting from NDS32_GPR_ARG_FIRST_REGNUM, for saving incoming arguments.
+     However, for nameless(anonymous) arguments, we should push them on the
+     stack so that all the nameless arguments appear to have been passed
+     consecutively in the memory for accessing.  Hence, we need to check and
+     exclude the registers that are used for named arguments.  */
+
+  cum = get_cumulative_args (ca);
+
+  /* The MODE and TYPE describe the last argument.
+     We need those information to determine the remaining registers
+     for varargs.  */
+  total_args_regs
+    = NDS32_MAX_GPR_REGS_FOR_ARGS + NDS32_GPR_ARG_FIRST_REGNUM;
+  num_of_used_regs
+    = NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (cum->gpr_offset, mode, type)
+      + NDS32_NEED_N_REGS_FOR_ARG (mode, type);
+
+  remaining_reg_count = total_args_regs - num_of_used_regs;
+  *pretend_args_size = remaining_reg_count * UNITS_PER_WORD;
+
+  return;
+}
+
 static bool
 nds32_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
 {
-  /* Return true so that all the named arguments for FUNCTION_ARG have named=1.
-     If return false, for the variadic function, all named arguments EXCEPT
-     the last are treated as named.  */
+  /* If this hook returns true, the named argument of FUNCTION_ARG is always
+     true for named arguments, and false for unnamed arguments.  */
   return true;
 }
 
@@ -2183,7 +2589,7 @@
     sorry ("a nested function is not supported for reduced registers");
 
   /* STEP 1: Copy trampoline code template into stack,
-             fill up essential data into stack.  */
+	     fill up essential data into stack.  */
 
   /* Extract nested function address rtx.  */
   fnaddr = XEXP (DECL_RTL (fndecl), 0);
@@ -2219,8 +2625,8 @@
       && (tramp_align_in_bytes % nds32_cache_block_size) == 0)
     {
       /* Under this condition, the starting address of trampoline
-         must be aligned to the starting address of each cache block
-         and we do not have to worry about cross-boundary issue.  */
+	 must be aligned to the starting address of each cache block
+	 and we do not have to worry about cross-boundary issue.  */
       for (i = 0;
 	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
 	       / nds32_cache_block_size;
@@ -2235,10 +2641,10 @@
   else if (TRAMPOLINE_SIZE > nds32_cache_block_size)
     {
       /* The starting address of trampoline code
-         may not be aligned to the cache block,
-         so the trampoline code may be across two cache block.
-         We need to sync the last element, which is 4-byte size,
-         of trampoline template.  */
+	 may not be aligned to the cache block,
+	 so the trampoline code may be across two cache block.
+	 We need to sync the last element, which is 4-byte size,
+	 of trampoline template.  */
       for (i = 0;
 	   i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1)
 	       / nds32_cache_block_size;
@@ -2259,16 +2665,16 @@
   else
     {
       /* This is the simplest case.
-         Because TRAMPOLINE_SIZE is less than or
-         equal to nds32_cache_block_size,
-         we can just sync start address and
-         the last element of trampoline code.  */
+	 Because TRAMPOLINE_SIZE is less than or
+	 equal to nds32_cache_block_size,
+	 we can just sync start address and
+	 the last element of trampoline code.  */
 
       /* Sync starting address of tampoline code.  */
       emit_move_insn (tmp_reg, sync_cache_addr);
       emit_insn (isync_insn);
       /* Sync the last element, which is 4-byte size,
-         of trampoline template.  */
+	 of trampoline template.  */
       emit_move_insn (tmp_reg,
 		      plus_constant (Pmode, sync_cache_addr,
 				     TRAMPOLINE_SIZE - 4));
@@ -2286,9 +2692,50 @@
 static bool
 nds32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
 {
+  if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+    {
+     /* When using floating-point instructions,
+	we don't allow 'addr' to be [symbol_ref], [CONST] pattern.  */
+      if (mode == DFmode
+	  && (GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE(x) == CONST))
+	return false;
+
+      /* Allow [post_modify] addressing mode, when using FPU instructions.  */
+      if (GET_CODE (x) == POST_MODIFY
+	  && mode == DFmode)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == REG
+	      && GET_CODE (XEXP (x, 1)) == PLUS)
+	    {
+	      rtx plus_op = XEXP (x, 1);
+	      rtx op0 = XEXP (plus_op, 0);
+	      rtx op1 = XEXP (plus_op, 1);
+
+	      if (nds32_address_register_rtx_p (op0, strict)
+		  && CONST_INT_P (op1))
+		{
+		  if (satisfies_constraint_Is14 (op1))
+		    {
+		      /* If it is not under strictly aligned situation,
+			 we can return true without checking alignment.  */
+		      if (!cfun->machine->strict_aligned_p)
+			return true;
+		     /* Make sure address is word alignment.
+			Currently we do not have 64-bit load/store yet,
+			so we will use two 32-bit load/store instructions to do
+			memory access and they are single word alignment.  */
+		      else if (NDS32_SINGLE_WORD_ALIGN_P (INTVAL (op1)))
+			return true;
+		    }
+		}
+	    }
+	}
+    }
+
   /* For (mem:DI addr) or (mem:DF addr) case,
      we only allow 'addr' to be [reg], [symbol_ref],
-                                [const], or [reg + const_int] pattern.  */
+				[const], or [reg + const_int] pattern.  */
   if (mode == DImode || mode == DFmode)
     {
       /* Allow [Reg + const_int] addressing mode.  */
@@ -2298,13 +2745,19 @@
 	      && nds32_legitimate_index_p (mode, XEXP (x, 1), strict)
 	      && CONST_INT_P (XEXP (x, 1)))
 	    return true;
-
 	  else if (nds32_address_register_rtx_p (XEXP (x, 1), strict)
 		   && nds32_legitimate_index_p (mode, XEXP (x, 0), strict)
 		   && CONST_INT_P (XEXP (x, 0)))
 	    return true;
 	}
 
+      /* Allow [post_inc] and [post_dec] addressing mode.  */
+      if (GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
+	{
+	  if (nds32_address_register_rtx_p (XEXP (x, 0), strict))
+	    return true;
+	}
+
       /* Now check [reg], [symbol_ref], and [const].  */
       if (GET_CODE (x) != REG
 	  && GET_CODE (x) != SYMBOL_REF
@@ -2320,26 +2773,34 @@
       return nds32_address_register_rtx_p (x, strict);
 
     case SYMBOL_REF:
+      /* (mem (symbol_ref A)) => [symbol_ref] */
+
+      if (flag_pic || SYMBOL_REF_TLS_MODEL (x))
+	return false;
 
-      if (!TARGET_GP_DIRECT
+      /* If -mcmodel=large, the 'symbol_ref' is not a valid address
+	 during or after LRA/reload phase.  */
+      if (TARGET_CMODEL_LARGE
 	  && (reload_completed
 	      || reload_in_progress
 	      || lra_in_progress))
 	return false;
-
-      /* (mem (symbol_ref A)) => [symbol_ref] */
-      return !currently_expanding_to_rtl;
-
-    case CONST:
-
-      if (!TARGET_GP_DIRECT
+      /* If -mcmodel=medium and the symbol references to rodata section,
+	 the 'symbol_ref' is not a valid address during or after
+	 LRA/reload phase.  */
+      if (TARGET_CMODEL_MEDIUM
+	  && (NDS32_SYMBOL_REF_RODATA_P (x)
+	      || CONSTANT_POOL_ADDRESS_P (x))
 	  && (reload_completed
 	      || reload_in_progress
 	      || lra_in_progress))
 	return false;
 
+      return true;
+
+    case CONST:
       /* (mem (const (...)))
-         => [ + const_addr ], where const_addr = symbol_ref + const_int */
+	 => [ + const_addr ], where const_addr = symbol_ref + const_int */
       if (GET_CODE (XEXP (x, 0)) == PLUS)
 	{
 	  rtx plus_op = XEXP (x, 0);
@@ -2348,18 +2809,43 @@
 	  rtx op1 = XEXP (plus_op, 1);
 
 	  if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1))
-	    return true;
-	  else
-	    return false;
+	    {
+	      /* Now we see the [ + const_addr ] pattern, but we need
+		 some further checking.  */
+
+	      if (flag_pic)
+		return false;
+
+	      /* If -mcmodel=large, the 'const_addr' is not a valid address
+		 during or after LRA/reload phase.  */
+	      if (TARGET_CMODEL_LARGE
+		  && (reload_completed
+		      || reload_in_progress
+		      || lra_in_progress))
+		return false;
+	      /* If -mcmodel=medium and the symbol references to rodata section,
+		 the 'const_addr' is not a valid address during or after
+		 LRA/reload phase.  */
+	      if (TARGET_CMODEL_MEDIUM
+		  && NDS32_SYMBOL_REF_RODATA_P (op0)
+		  && (reload_completed
+		      || reload_in_progress
+		      || lra_in_progress))
+		return false;
+
+	      /* At this point we can make sure 'const_addr' is a
+		 valid address.  */
+	      return true;
+	    }
 	}
 
 	return false;
 
     case POST_MODIFY:
       /* (mem (post_modify (reg) (plus (reg) (reg))))
-         => [Ra], Rb */
+	 => [Ra], Rb */
       /* (mem (post_modify (reg) (plus (reg) (const_int))))
-         => [Ra], const_int */
+	 => [Ra], const_int */
       if (GET_CODE (XEXP (x, 0)) == REG
 	  && GET_CODE (XEXP (x, 1)) == PLUS)
 	{
@@ -2382,7 +2868,7 @@
       /* (mem (post_inc reg)) => [Ra], 1/2/4 */
       /* (mem (post_dec reg)) => [Ra], -1/-2/-4 */
       /* The 1/2/4 or -1/-2/-4 have been displayed in nds32.md.
-         We only need to deal with register Ra.  */
+	 We only need to deal with register Ra.  */
       if (nds32_address_register_rtx_p (XEXP (x, 0), strict))
 	return true;
       else
@@ -2390,11 +2876,11 @@
 
     case PLUS:
       /* (mem (plus reg const_int))
-         => [Ra + imm] */
+	 => [Ra + imm] */
       /* (mem (plus reg reg))
-         => [Ra + Rb] */
+	 => [Ra + Rb] */
       /* (mem (plus (mult reg const_int) reg))
-         => [Ra + Rb << sv] */
+	 => [Ra + Rb << sv] */
       if (nds32_address_register_rtx_p (XEXP (x, 0), strict)
 	  && nds32_legitimate_index_p (mode, XEXP (x, 1), strict))
 	return true;
@@ -2405,245 +2891,450 @@
 	return false;
 
     case LO_SUM:
-      if (!TARGET_GP_DIRECT)
-	return true;
+      /* (mem (lo_sum (reg) (symbol_ref))) */
+      /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */
+      /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */
+      /* The LO_SUM is a valid address if and only if we would like to
+	 generate 32-bit full address memory access with any of following
+	 circumstance:
+	   1. -mcmodel=large.
+	   2. -mcmodel=medium and the symbol_ref references to rodata.  */
+      {
+	rtx sym = NULL_RTX;
+
+	if (flag_pic)
+	  return false;
+
+	if (!REG_P (XEXP (x, 0)))
+	  return false;
+
+	if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+	  sym = XEXP (x, 1);
+	else if (GET_CODE (XEXP (x, 1)) == CONST)
+	  {
+	    rtx plus = XEXP(XEXP (x, 1), 0);
+	    if (GET_CODE (plus) == PLUS)
+	      sym = XEXP (plus, 0);
+	    else if (GET_CODE (plus) == UNSPEC)
+	      sym = XEXP(XEXP (plus, 0), 0);
+	  }
+	else
+	  return false;
+
+	gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+
+	if (TARGET_CMODEL_LARGE)
+	  return true;
+	else if (TARGET_CMODEL_MEDIUM
+		 && NDS32_SYMBOL_REF_RODATA_P (sym))
+	  return true;
+	else
+	  return false;
+      }
 
     default:
       return false;
     }
 }
 
-
-/* Describing Relative Costs of Operations.  */
+/* Convert a non-PIC address in `x' to a PIC address using @GOT or
+   @GOTOFF.
 
-static int nds32_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
-				     reg_class_t from,
-				     reg_class_t to)
+  Example for @GOTOFF:
+    lw $r0, symbol@GOTOFF
+      -> sethi $ta, hi20(symbol@GOTOFF)
+	 ori $ta, $ta, lo12(symbol@GOTOFF)
+	 lw  $r0, [$ta + $gp]
+
+  Example for @GOT:
+    lw $r0, symbol@GOT
+      -> sethi $ta, hi20(symbol@GOT)
+	 ori $ta, $ta, lo12(symbol@GOT)
+	 lw  $ta, [$ta + $gp]
+	 lw  $r0, [$ta] */
+static rtx
+nds32_legitimize_pic_address (rtx x)
 {
-  if (from == HIGH_REGS || to == HIGH_REGS)
-    return 6;
+  rtx addr = x;
+  rtx reg = gen_reg_rtx (Pmode);
 
-  return 2;
+  if (GET_CODE (x) == LABEL_REF
+      || (GET_CODE (x) == SYMBOL_REF
+	  && (CONSTANT_POOL_ADDRESS_P (x)
+	      || SYMBOL_REF_LOCAL_P (x))))
+    {
+      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF);
+      addr = gen_rtx_CONST (SImode, addr);
+      emit_insn (gen_sethi (reg, addr));
+      emit_insn (gen_lo_sum (reg, reg, addr));
+      x = gen_rtx_PLUS (SImode, pic_offset_table_rtx, reg);
+    }
+  else if (GET_CODE (x) == SYMBOL_REF)
+    {
+      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT);
+      addr = gen_rtx_CONST (SImode, addr);
+      emit_insn (gen_sethi (reg, addr));
+      emit_insn (gen_lo_sum (reg, reg, addr));
+
+      /* lw  $ta, [$ta + $gp] */
+      rtx got_addr = gen_frame_mem (SImode, gen_rtx_PLUS (Pmode,
+							  pic_offset_table_rtx,
+							  reg));
+      emit_move_insn (reg, got_addr);
+      x = reg;
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      addr = XEXP (x, 0);
+      gcc_assert (GET_CODE (addr) == PLUS);
+
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+
+      if ((GET_CODE (op0) == LABEL_REF
+	   || (GET_CODE (op0) == SYMBOL_REF
+	       && (CONSTANT_POOL_ADDRESS_P (op0)
+		   || SYMBOL_REF_LOCAL_P (op0))))
+	  && GET_CODE (op1) == CONST_INT)
+	{
+	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF);
+	  addr = gen_rtx_CONST (Pmode, addr);
+	  emit_insn (gen_sethi (reg, addr));
+	  emit_insn (gen_lo_sum (reg, reg, addr));
+	  emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+	  emit_insn (gen_addsi3 (reg, reg, op1));
+	  x = reg;
+	}
+      else if (GET_CODE (op0) == SYMBOL_REF
+	       && GET_CODE (op1) == CONST_INT)
+	{
+	  /* This is a constant offset from a @GOT symbol reference.  */
+	  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT);
+	  addr = gen_rtx_CONST (SImode, addr);
+	  emit_insn (gen_sethi (reg, addr));
+	  emit_insn (gen_lo_sum (reg, reg, addr));
+
+	  /* lw  $ta, [$ta + $gp] */
+	  rtx got_addr = gen_frame_mem (SImode,
+					gen_rtx_PLUS (Pmode,
+						      pic_offset_table_rtx,
+						      reg));
+	  emit_move_insn (reg, got_addr);
+	  emit_insn (gen_addsi3 (reg, reg, op1));
+	  x = reg;
+	}
+      else
+	{
+	  /* Don't handle this pattern.  */
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+    }
+  return x;
 }
 
-static int nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
-				   reg_class_t rclass ATTRIBUTE_UNUSED,
-				   bool in ATTRIBUTE_UNUSED)
-{
-  return 8;
+static rtx
+nds32_legitimize_address (rtx x,
+			  rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (nds32_tls_referenced_p (x))
+    x = nds32_legitimize_tls_address (x);
+  else if (flag_pic && SYMBOLIC_CONST_P (x))
+    x = nds32_legitimize_pic_address (x);
+
+  return x;
 }
 
-/* This target hook describes the relative costs of RTL expressions.
-   Return 'true' when all subexpressions of x have been processed.
-   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
-   Refer to gcc/rtlanal.c for more information.  */
 static bool
-nds32_rtx_costs (rtx x,
-		 int code,
-		 int outer_code,
-		 int opno ATTRIBUTE_UNUSED,
-		 int *total,
-		 bool speed)
+nds32_legitimate_constant_p (enum machine_mode mode, rtx x)
 {
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
-  else
-    goto size_cost;
-
-
-performance_cost:
-  /* This is section for performance cost model.  */
-
-  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
-     We treat it as 4-cycle cost for each instruction
-     under performance consideration.  */
-  switch (code)
+  switch (GET_CODE (x))
     {
-    case SET:
-      /* For 'SET' rtx, we need to return false
-         so that it can recursively calculate costs.  */
-      return false;
-
-    case USE:
-      /* Used in combine.c as a marker.  */
-      *total = 0;
+    case CONST_DOUBLE:
+      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+	  && (mode == DFmode || mode == SFmode))
+	return false;
       break;
+    case CONST:
+      x = XEXP (x, 0);
 
-    case MULT:
-      *total = COSTS_N_INSNS (1);
-      break;
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
 
-    case DIV:
-    case UDIV:
-    case MOD:
-    case UMOD:
-      *total = COSTS_N_INSNS (7);
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	      return false;
+	    default:
+	      return true;
+	    }
+	}
       break;
-
-    default:
-      *total = COSTS_N_INSNS (1);
+    case SYMBOL_REF:
+      /* TLS symbols need a call to resolve in
+	 precompute_register_parameters.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return false;
       break;
+    default:
+      return true;
     }
 
   return true;
+}
 
+/* Reorgnize the UNSPEC CONST and return its direct symbol.  */
+static rtx
+nds32_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
 
-size_cost:
-  /* This is section for size cost model.  */
-
-  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
-     We treat it as 4-byte cost for each instruction
-     under code size consideration.  */
-  switch (code)
+  if (GET_CODE(x) == CONST)
     {
-    case SET:
-      /* For 'SET' rtx, we need to return false
-         so that it can recursively calculate costs.  */
-      return false;
+      rtx inner = XEXP (x, 0);
 
-    case USE:
-      /* Used in combine.c as a marker.  */
-      *total = 0;
-      break;
+      /* Handle for GOTOFF.  */
+      if (GET_CODE (inner) == PLUS)
+	inner = XEXP (inner, 0);
 
-    case CONST_INT:
-      /* All instructions involving constant operation
-         need to be considered for cost evaluation.  */
-      if (outer_code == SET)
-	{
-	  /* (set X imm5s), use movi55, 2-byte cost.
-	     (set X imm20s), use movi, 4-byte cost.
-	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
-	  if (satisfies_constraint_Is05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
-	  else if (satisfies_constraint_Is20 (x))
-	    *total = COSTS_N_INSNS (1);
-	  else
-	    *total = COSTS_N_INSNS (2);
-	}
-      else if (outer_code == PLUS || outer_code == MINUS)
-	{
-	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
-	     General case, cost 1 instruction with 4-byte.  */
-	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
-	  else
-	    *total = COSTS_N_INSNS (1);
-	}
-      else if (outer_code == ASHIFT)
+      if (GET_CODE (inner) == UNSPEC)
 	{
-	  /* Possible slli333, 2-byte cost.
-	     General case, cost 1 instruction with 4-byte.  */
-	  if (satisfies_constraint_Iu03 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
-	  else
-	    *total = COSTS_N_INSNS (1);
-	}
-      else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
-	{
-	  /* Possible srai45 or srli45, 2-byte cost.
-	     General case, cost 1 instruction with 4-byte.  */
-	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
-	  else
-	    *total = COSTS_N_INSNS (1);
+	  switch (XINT (inner, 1))
+	    {
+	    case UNSPEC_GOTINIT:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	      x = XVECEXP (inner, 0, 0);
+	      break;
+	    default:
+	      break;
+	    }
 	}
-      else
+    }
+  return x;
+}
+
+static enum machine_mode
+nds32_vectorize_preferred_simd_mode (enum machine_mode mode)
+{
+  if (!NDS32_EXT_DSP_P ())
+    return word_mode;
+
+  switch (mode)
+    {
+    case QImode:
+      return V4QImode;
+    case HImode:
+      return V2HImode;
+    default:
+      return word_mode;
+    }
+}
+
+static bool
+nds32_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+      if (GET_CODE (x) == UNSPEC)
 	{
-	  /* For other cases, simply set it 4-byte cost.  */
-	  *total = COSTS_N_INSNS (1);
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	      return true;
+	    default:
+	      return false;
+	    }
 	}
       break;
-
-    case CONST_DOUBLE:
-      /* It requires high part and low part processing, set it 8-byte cost.  */
-      *total = COSTS_N_INSNS (2);
+    case SYMBOL_REF:
+      /* We don't want to force symbol as constant pool in .text section,
+	 because we use the gp-relatived instruction to load in small
+	 or medium model.  */
+      if (SYMBOL_REF_TLS_MODEL (x)
+	  || TARGET_CMODEL_SMALL
+	  || TARGET_CMODEL_MEDIUM)
+	return true;
       break;
-
     default:
-      /* For other cases, generally we set it 4-byte cost
-         and stop resurively traversing.  */
-      *total = COSTS_N_INSNS (1);
-      break;
+      return false;
     }
+  return false;
+}
 
-  return true;
+
+/* Condition Code Status.  */
+
+/* -- Representation of condition codes using registers.  */
+
+static void
+nds32_canonicalize_comparison (int *code,
+			       rtx *op0 ATTRIBUTE_UNUSED,
+			       rtx *op1,
+			       bool op0_preserve_value ATTRIBUTE_UNUSED)
+{
+  /* When the instruction combination pass tries to combine a comparison insn
+     with its previous insns, it also transforms the operator in order to
+     minimize its constant field.  For example, it tries to transform a
+     comparison insn from
+       (set (reg:SI 54)
+	   (ltu:SI (reg:SI 52)
+	       (const_int 10 [0xa])))
+     to
+       (set (reg:SI 54)
+	   (leu:SI (reg:SI 52)
+	       (const_int 9 [0x9])))
+
+     However, the nds32 target only provides instructions supporting the LTU
+     operation directly, and the implementation of the pattern "cbranchsi4"
+     only expands the LTU form.  In order to handle the non-LTU operations
+     generated from passes other than the RTL expansion pass, we have to
+     implement this hook to revert those changes.  Since we only expand the LTU
+     operator in the RTL expansion pass, we might only need to handle the LEU
+     case, unless we find other optimization passes perform more aggressive
+     transformations.  */
+
+  if (*code == LEU && CONST_INT_P (*op1))
+    {
+      *op1 = gen_int_mode (INTVAL (*op1) + 1, SImode);
+      *code = LTU;
+    }
+}
+
+
+/* Describing Relative Costs of Operations.  */
+
+static int
+nds32_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  reg_class_t from,
+			  reg_class_t to)
+{
+  if ((from == FP_REGS && to != FP_REGS)
+      || (from != FP_REGS && to == FP_REGS))
+    return 9;
+  else if (from == HIGH_REGS || to == HIGH_REGS)
+    return optimize_size ? 6 : 2;
+  else
+    return 2;
 }
 
-static int nds32_address_cost (rtx address,
-			       enum machine_mode mode ATTRIBUTE_UNUSED,
-			       addr_space_t as ATTRIBUTE_UNUSED,
-			       bool speed)
-{
-  rtx plus0, plus1;
-  enum rtx_code code;
-
-  code = GET_CODE (address);
-
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
+static int
+nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			reg_class_t rclass ATTRIBUTE_UNUSED,
+			bool in ATTRIBUTE_UNUSED)
+{
+  /* Memory access is only need 1 cycle in our low-end processor,
+     however memory access is most 4-byte instruction,
+     so let it 8 for optimize_size, otherwise be 2.   */
+  if (nds32_memory_model_option == MEMORY_MODEL_FAST)
+    return optimize_size ? 8 : 4;
   else
-    goto size_cost;
+    return 8;
+}
+
+/* This target hook describes the relative costs of RTL expressions.
+   Return 'true' when all subexpressions of x have been processed.
+   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
+   Refer to gcc/rtlanal.c for more information.  */
+static bool
+nds32_rtx_costs (rtx x,
+		 int code,
+		 int outer_code,
+		 int opno,
+		 int *total,
+		 bool speed)
+{
+  return nds32_rtx_costs_impl (x, code, outer_code, opno, total, speed);
+}
 
-performance_cost:
-  /* This is section for performance cost model.  */
+static int
+nds32_address_cost (rtx address,
+		    enum machine_mode mode,
+		    addr_space_t as,
+		    bool speed)
+{
+  return nds32_address_cost_impl (address, mode, as, speed);
+}
 
-  /* FALLTHRU, currently we use same cost model as size_cost.  */
+
+/* Adjusting the Instruction Scheduler.  */
 
-size_cost:
-  /* This is section for size cost model.  */
+static int
+nds32_sched_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+    return 0;
 
-  switch (code)
-    {
-    case POST_MODIFY:
-    case POST_INC:
-    case POST_DEC:
-      /* We encourage that rtx contains
-         POST_MODIFY/POST_INC/POST_DEC behavior.  */
-      return 0;
+  if (INSN_CODE (insn) < 0 || INSN_CODE (dep) < 0)
+    return cost;
 
-    case SYMBOL_REF:
-      /* We can have gp-relative load/store for symbol_ref.
-         Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+  return cost;
+}
 
-    case CONST:
-      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
-         Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+
+/* Dividing the Output into Sections (Texts, Data, . . . ).  */
 
-    case REG:
-      /* Simply return 4-byte costs.  */
-      return COSTS_N_INSNS (1);
+/* If references to a symbol or a constant must be treated differently
+   depending on something about the variable or function named by the symbol
+   (such as what section it is in), we use this hook to store flags
+   in symbol_ref rtx.  */
+static void
+nds32_encode_section_info (tree decl, rtx rtl, int new_decl_p)
+{
+  default_encode_section_info (decl, rtl, new_decl_p);
 
-    case PLUS:
-      /* We do not need to check if the address is a legitimate address,
-         because this hook is never called with an invalid address.
-         But we better check the range of
-         const_int value for cost, if it exists.  */
-      plus0 = XEXP (address, 0);
-      plus1 = XEXP (address, 1);
-
-      if (REG_P (plus0) && CONST_INT_P (plus1))
-        {
-	  /* If it is possible to be lwi333/swi333 form,
-	     make it 2-byte cost.  */
-	  if (satisfies_constraint_Iu05 (plus1))
-	    return (COSTS_N_INSNS (1) - 2);
-	  else
-	    return COSTS_N_INSNS (1);
-	}
+  /* For the memory rtx, if it references to rodata section, we can store
+     NDS32_SYMBOL_FLAG_RODATA flag into symbol_ref rtx so that the
+     nds32_legitimate_address_p() can determine how to treat such symbol_ref
+     based on -mcmodel=X and this information.  */
+  if (MEM_P (rtl) && MEM_READONLY_P (rtl))
+    {
+      rtx addr = XEXP (rtl, 0);
 
-      /* For other 'plus' situation, make it cost 4-byte.  */
-      return COSTS_N_INSNS (1);
+      if (GET_CODE (addr) == SYMBOL_REF)
+	{
+	  /* For (mem (symbol_ref X)) case.  */
+	  SYMBOL_REF_FLAGS (addr) |= NDS32_SYMBOL_FLAG_RODATA;
+	}
+      else if (GET_CODE (addr) == CONST
+	       && GET_CODE (XEXP (addr, 0)) == PLUS)
+	{
+	  /* For (mem (const (plus (symbol_ref X) (const_int N)))) case.  */
+	  rtx plus_op = XEXP (addr, 0);
+	  rtx op0 = XEXP (plus_op, 0);
+	  rtx op1 = XEXP (plus_op, 1);
 
-    default:
-      break;
+	  if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1))
+	    SYMBOL_REF_FLAGS (op0) |= NDS32_SYMBOL_FLAG_RODATA;
+	}
     }
-
-  return COSTS_N_INSNS (4);
 }
 
 
@@ -2654,33 +3345,50 @@
 static void
 nds32_asm_file_start (void)
 {
-  int i;
-
   default_file_start ();
 
+  if (flag_pic)
+    fprintf (asm_out_file, "\t.pic\n");
+
   /* Tell assembler which ABI we are using.  */
   fprintf (asm_out_file, "\t! ABI version\n");
-  fprintf (asm_out_file, "\t.abi_2\n");
+  if (TARGET_HARD_FLOAT)
+    fprintf (asm_out_file, "\t.abi_2fp_plus\n");
+  else
+    fprintf (asm_out_file, "\t.abi_2\n");
 
   /* Tell assembler that this asm code is generated by compiler.  */
   fprintf (asm_out_file, "\t! This asm file is generated by compiler\n");
   fprintf (asm_out_file, "\t.flag\tverbatim\n");
-  /* Give assembler the size of each vector for interrupt handler.  */
-  fprintf (asm_out_file, "\t! This vector size directive is required "
-			 "for checking inconsistency on interrupt handler\n");
-  fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+
+  /* We need to provide the size of each vector for interrupt handler
+     under elf toolchain.  */
+  if (!TARGET_LINUX_ABI)
+    {
+      fprintf (asm_out_file, "\t! This vector size directive is required "
+			     "for checking inconsistency on interrupt handler\n");
+      fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+    }
 
   /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
      the compiler may produce 'la $fp,_FP_BASE_' instruction
      at prologue for fp-as-gp optimization.
      We should emit weak reference of _FP_BASE_ to avoid undefined reference
      in case user does not pass '--relax' option to linker.  */
-  if (TARGET_FORCE_FP_AS_GP || optimize_size)
+  if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size))
     {
       fprintf (asm_out_file, "\t! This weak reference is required to do "
 			     "fp-as-gp link time optimization\n");
       fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
     }
+  /* If user enables '-mifc', we should emit relaxation directive
+     to tell linker that this file is allowed to do ifc optimization.  */
+  if (TARGET_IFC)
+    {
+      fprintf (asm_out_file, "\t! This relaxation directive is required "
+			     "to do ifc link time optimization\n");
+      fprintf (asm_out_file, "\t.relax\tifc\n");
+    }
   /* If user enables '-mex9', we should emit relaxation directive
      to tell linker that this file is allowed to do ex9 optimization.  */
   if (TARGET_EX9)
@@ -2699,9 +3407,34 @@
   if (TARGET_ISA_V3M)
     fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M");
 
+  if (TARGET_PIPELINE_N8)
+    fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8");
+  if (TARGET_PIPELINE_N10)
+    fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10");
+  if (TARGET_PIPELINE_N12)
+    fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N12");
+  if (TARGET_PIPELINE_SIMPLE)
+    fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE");
+
+  if (TARGET_CMODEL_SMALL)
+    fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL");
+  if (TARGET_CMODEL_MEDIUM)
+    fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "MEDIUM");
+  if (TARGET_CMODEL_LARGE)
+    fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "LARGE");
+
   fprintf (asm_out_file, "\t! Endian setting\t: %s\n",
 			 ((TARGET_BIG_ENDIAN) ? "big-endian"
 					      : "little-endian"));
+  fprintf (asm_out_file, "\t! Use SP floating-point instruction\t: %s\n",
+			 ((TARGET_FPU_SINGLE) ? "Yes"
+					      : "No"));
+  fprintf (asm_out_file, "\t! Use DP floating-point instruction\t: %s\n",
+			 ((TARGET_FPU_DOUBLE) ? "Yes"
+					      : "No"));
+  fprintf (asm_out_file, "\t! ABI version\t\t: %s\n",
+			 ((TARGET_HARD_FLOAT) ? "ABI2FP+"
+					      : "ABI2"));
 
   fprintf (asm_out_file, "\t! ------------------------------------\n");
 
@@ -2709,8 +3442,14 @@
 			 ((TARGET_CMOV) ? "Yes"
 					: "No"));
   fprintf (asm_out_file, "\t! Use performance extension\t: %s\n",
-			 ((TARGET_PERF_EXT) ? "Yes"
+			 ((TARGET_EXT_PERF) ? "Yes"
 					    : "No"));
+  fprintf (asm_out_file, "\t! Use performance extension 2\t: %s\n",
+			 ((TARGET_EXT_PERF2) ? "Yes"
+					     : "No"));
+  fprintf (asm_out_file, "\t! Use string extension\t\t: %s\n",
+			 ((TARGET_EXT_STRING) ? "Yes"
+					      : "No"));
 
   fprintf (asm_out_file, "\t! ------------------------------------\n");
 
@@ -2720,9 +3459,6 @@
   fprintf (asm_out_file, "\t! 16-bit instructions\t: %s\n",
 			 ((TARGET_16_BIT) ? "Yes"
 					  : "No"));
-  fprintf (asm_out_file, "\t! GP base access\t: %s\n",
-			 ((TARGET_GP_DIRECT) ? "Yes"
-					     : "No"));
   fprintf (asm_out_file, "\t! Reduced registers set\t: %s\n",
 			 ((TARGET_REDUCED_REGS) ? "Yes"
 						: "No"));
@@ -2731,6 +3467,10 @@
 
   if (optimize_size)
     fprintf (asm_out_file, "\t! Optimization level\t: -Os\n");
+  else if (optimize_fast)
+    fprintf (asm_out_file, "\t! Optimization level\t: -Ofast\n");
+  else if (optimize_debug)
+    fprintf (asm_out_file, "\t! Optimization level\t: -Og\n");
   else
     fprintf (asm_out_file, "\t! Optimization level\t: -O%d\n", optimize);
 
@@ -2741,63 +3481,61 @@
 
   fprintf (asm_out_file, "\t! ------------------------------------\n");
 
-  /* Initialize isr vector information array before compiling functions.  */
-  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
-    {
-      nds32_isr_vectors[i].category = NDS32_ISR_NONE;
-      strcpy (nds32_isr_vectors[i].func_name, "");
-      nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
-      nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
-      nds32_isr_vectors[i].total_n_vectors = 0;
-      strcpy (nds32_isr_vectors[i].nmi_name, "");
-      strcpy (nds32_isr_vectors[i].warm_name, "");
-    }
+  nds32_asm_file_start_for_isr ();
 }
 
 static void
 nds32_asm_file_end (void)
 {
-  int i;
-
-  /* If all the vectors are NDS32_ISR_NONE, we can return immediately.  */
-  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
-    if (nds32_isr_vectors[i].category != NDS32_ISR_NONE)
-      break;
-
-  if (i == NDS32_N_ISR_VECTORS)
-    return;
-
-  /* At least one vector is NOT NDS32_ISR_NONE,
-     we should output isr vector information.  */
-  fprintf (asm_out_file, "\t! ------------------------------------\n");
-  fprintf (asm_out_file, "\t! The isr vector information:\n");
+  nds32_asm_file_end_for_isr ();
   fprintf (asm_out_file, "\t! ------------------------------------\n");
+}
 
-  /* Check reset handler first.  Its vector number is always 0.  */
-  if (nds32_isr_vectors[0].category == NDS32_ISR_RESET)
+static bool
+nds32_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
     {
-      nds32_emit_isr_reset_content ();
-      fprintf (asm_out_file, "\t! ------------------------------------\n");
-    }
-
-  /* Check other vectors, starting from vector number 1.  */
-  for (i = 1; i < NDS32_N_ISR_VECTORS; i++)
-    {
-      if (nds32_isr_vectors[i].category == NDS32_ISR_INTERRUPT
-	  || nds32_isr_vectors[i].category == NDS32_ISR_EXCEPTION)
-	{
-	  /* Found one vector which is interupt or exception.
-	     Output its jmptbl and vector section content.  */
-	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
-	  fprintf (asm_out_file, "\t! ------------------------------------\n");
-	  nds32_emit_isr_jmptbl_section (i);
-	  fprintf (asm_out_file, "\t! ....................................\n");
-	  nds32_emit_isr_vector_section (i);
-	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_GOTINIT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_TLSGD:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TLSDESC", file);
+	  break;
+	case UNSPEC_TLSLD:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TLSDESC", file);
+	  break;
+	case UNSPEC_TLSIE:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTTPOFF", file);
+	  break;
+	case UNSPEC_TLSLE:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", file);
+	  break;
+	default:
+	  return false;
 	}
+      return true;
     }
-
-  fprintf (asm_out_file, "\t! ------------------------------------\n");
+  else
+    return false;
 }
 
 /* -- Output and Generation of Labels.  */
@@ -2815,7 +3553,15 @@
 static void
 nds32_print_operand (FILE *stream, rtx x, int code)
 {
-  int op_value;
+  HOST_WIDE_INT op_value = 0;
+  HOST_WIDE_INT one_position;
+  HOST_WIDE_INT zero_position;
+  bool pick_lsb_p = false;
+  bool pick_msb_p = false;
+  int regno;
+
+  if (CONST_INT_P (x))
+    op_value = INTVAL (x);
 
   switch (code)
     {
@@ -2823,22 +3569,75 @@
       /* Do nothing special.  */
       break;
 
+    case 'b':
+      /* Use exact_log2() to search the 0-bit position.  */
+      gcc_assert (CONST_INT_P (x));
+      zero_position = exact_log2 (~UINTVAL (x) & GET_MODE_MASK (SImode));
+      gcc_assert (zero_position != -1);
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, zero_position);
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
+    case 'e':
+      gcc_assert (MEM_P (x)
+		  && GET_CODE (XEXP (x, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (XEXP (XEXP (x, 0), 1)));
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
+    case 'v':
+      gcc_assert (CONST_INT_P (x)
+		  && (INTVAL (x) == 0
+		      || INTVAL (x) == 8
+		      || INTVAL (x) == 16
+		      || INTVAL (x) == 24));
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
+    case 'B':
+      /* Use exact_log2() to search the 1-bit position.  */
+      gcc_assert (CONST_INT_P (x));
+      one_position = exact_log2 (UINTVAL (x) & GET_MODE_MASK (SImode));
+      gcc_assert (one_position != -1);
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, one_position);
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
+    case 'L':
+      /* X is supposed to be REG rtx.  */
+      gcc_assert (REG_P (x));
+      /* Claim that we are going to pick LSB part of X.  */
+      pick_lsb_p = true;
+      break;
+
+    case 'H':
+      /* X is supposed to be REG rtx.  */
+      gcc_assert (REG_P (x));
+      /* Claim that we are going to pick MSB part of X.  */
+      pick_msb_p = true;
+      break;
+
     case 'V':
-      /* 'x' is supposed to be CONST_INT, get the value.  */
+      /* X is supposed to be CONST_INT, get the value.  */
       gcc_assert (CONST_INT_P (x));
-      op_value = INTVAL (x);
 
       /* According to the Andes architecture,
-         the system/user register index range is 0 ~ 1023.
-         In order to avoid conflict between user-specified-integer value
-         and enum-specified-register value,
-         the 'enum nds32_intrinsic_registers' value
-         in nds32_intrinsic.h starts from 1024.  */
+	 the system/user register index range is 0 ~ 1023.
+	 In order to avoid conflict between user-specified-integer value
+	 and enum-specified-register value,
+	 the 'enum nds32_intrinsic_registers' value
+	 in nds32_intrinsic.h starts from 1024.  */
       if (op_value < 1024 && op_value >= 0)
 	{
 	  /* If user gives integer value directly (0~1023),
 	     we just print out the value.  */
-	  fprintf (stream, "%d", op_value);
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, op_value);
 	}
       else if (op_value < 0
 	       || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names)
@@ -2858,6 +3657,45 @@
       /* No need to handle following process, so return immediately.  */
       return;
 
+    case 'R': /* cctl valck  */
+      /* Note the cctl divide to 5 group and share the same name table.  */
+      if (op_value < 0 || op_value > 4)
+	error ("CCTL intrinsic function subtype out of range!");
+      fprintf (stream, "%s", nds32_cctl_names[op_value]);
+      return;
+
+    case 'T': /* cctl idxwbinv  */
+      /* Note the cctl divide to 5 group and share the same name table.  */
+      if (op_value < 0 || op_value > 4)
+	error ("CCTL intrinsic function subtype out of range!");
+      fprintf (stream, "%s", nds32_cctl_names[op_value + 4]);
+      return;
+
+    case 'U': /* cctl vawbinv  */
+      /* Note the cctl divide to 5 group and share the same name table.  */
+      if (op_value < 0 || op_value > 4)
+	error ("CCTL intrinsic function subtype out of range!");
+      fprintf (stream, "%s", nds32_cctl_names[op_value + 8]);
+      return;
+
+    case 'X': /* cctl idxread  */
+      /* Note the cctl divide to 5 group and share the same name table.  */
+      if (op_value < 0 || op_value > 4)
+	error ("CCTL intrinsic function subtype out of range!");
+      fprintf (stream, "%s", nds32_cctl_names[op_value + 12]);
+      return;
+
+    case 'W': /* cctl idxwitre  */
+      /* Note the cctl divide to 5 group and share the same name table.  */
+      if (op_value < 0 || op_value > 4)
+	error ("CCTL intrinsic function subtype out of range!");
+      fprintf (stream, "%s", nds32_cctl_names[op_value + 16]);
+      return;
+
+    case 'Z': /* dpref  */
+      fprintf (stream, "%s", nds32_dpref_names[op_value]);
+      return;
+
     default :
       /* Unknown flag.  */
       output_operand_lossage ("invalid operand output code");
@@ -2867,35 +3705,113 @@
   switch (GET_CODE (x))
     {
     case LABEL_REF:
+      output_addr_const (stream, x);
+      break;
+
     case SYMBOL_REF:
       output_addr_const (stream, x);
+
+      if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x))
+	fprintf (stream, "@ICT");
+
       break;
 
     case REG:
+      /* Print a Double-precision register name.  */
+      if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode)
+	  && NDS32_IS_FPR_REGNUM (REGNO (x)))
+	{
+	  regno = REGNO (x);
+	  if (!NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno))
+	    {
+	      output_operand_lossage ("invalid operand for code '%c'", code);
+	      break;
+	    }
+	  fprintf (stream, "$fd%d", (regno - NDS32_FIRST_FPR_REGNUM) >> 1);
+	  break;
+	}
+
+      /* Print LSB or MSB part of register pair if the
+	 constraint modifier 'L' or 'H' is specified.  */
+      if ((GET_MODE (x) == DImode || GET_MODE (x) == DFmode)
+	  && NDS32_IS_GPR_REGNUM (REGNO (x)))
+	{
+	  if ((pick_lsb_p && WORDS_BIG_ENDIAN)
+	      || (pick_msb_p && !WORDS_BIG_ENDIAN))
+	    {
+	      /* If we would like to print out LSB register under big-endian,
+		 or print out MSB register under little-endian, we need to
+		 increase register number.  */
+	      regno = REGNO (x);
+	      regno++;
+	      fputs (reg_names[regno], stream);
+	      break;
+	    }
+	}
+
       /* Forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REGNO (x) == STATIC_CHAIN_REGNUM)
 	sorry ("a nested function is not supported for reduced registers");
 
       /* Normal cases, print out register name.  */
-      fputs (reg_names[REGNO (x)], stream);
+      regno = REGNO (x);
+      fputs (reg_names[regno], stream);
       break;
 
     case MEM:
       output_address (XEXP (x, 0));
       break;
 
+    case HIGH:
+      if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_TYPE rv;
+	  long val;
+	  gcc_assert (GET_MODE (x) == SFmode);
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, XEXP (x, 0));
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+
+	  fprintf (stream, "hi20(0x%lx)", val);
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    case CONST_DOUBLE:
+      REAL_VALUE_TYPE rv;
+      long val;
+      gcc_assert (GET_MODE (x) == SFmode);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+
+      fprintf (stream, "0x%lx", val);
+      break;
+
     case CODE_LABEL:
     case CONST_INT:
     case CONST:
       output_addr_const (stream, x);
       break;
 
+    case CONST_VECTOR:
+      fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x));
+      break;
+
+    case LO_SUM:
+      /* This is a special case for inline assembly using memory address 'p'.
+	 The inline assembly code is expected to use pesudo instruction
+	 for the operand.  EX: la  */
+      output_addr_const (stream, XEXP(x, 1));
+      break;
+
     default:
       /* Generally, output_addr_const () is able to handle most cases.
-         We want to see what CODE could appear,
-         so we use gcc_unreachable() to stop it.  */
+	 We want to see what CODE could appear,
+	 so we use gcc_unreachable() to stop it.  */
       debug_rtx (x);
       gcc_unreachable ();
       break;
@@ -2918,15 +3834,25 @@
       fputs ("]", stream);
       break;
 
+    case LO_SUM:
+      /* This is a special case for inline assembly using memory operand 'm'.
+	 The inline assembly code is expected to use pesudo instruction
+	 for the operand.  EX: [ls].[bhw]  */
+      fputs ("[ + ", stream);
+      op1 = XEXP (x, 1);
+      output_addr_const (stream, op1);
+      fputs ("]", stream);
+      break;
+
     case REG:
       /* Forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REGNO (x) == STATIC_CHAIN_REGNUM)
 	sorry ("a nested function is not supported for reduced registers");
 
       /* [Ra] */
-      fprintf (stream, "[%s]", reg_names[REGNO (x)]);
+      fprintf (stream, "[%s + 0]", reg_names[REGNO (x)]);
       break;
 
     case PLUS:
@@ -2934,13 +3860,13 @@
       op1 = XEXP (x, 1);
 
       /* Checking op0, forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REG_P (op0)
 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
 	sorry ("a nested function is not supported for reduced registers");
       /* Checking op1, forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REG_P (op1)
 	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
@@ -2949,8 +3875,8 @@
       if (REG_P (op0) && CONST_INT_P (op1))
 	{
 	  /* [Ra + imm] */
-	  fprintf (stream, "[%s + (%d)]",
-			   reg_names[REGNO (op0)], (int)INTVAL (op1));
+	  fprintf (stream, "[%s + (" HOST_WIDE_INT_PRINT_DEC ")]",
+			   reg_names[REGNO (op0)], INTVAL (op1));
 	}
       else if (REG_P (op0) && REG_P (op1))
 	{
@@ -2963,8 +3889,8 @@
 	  /* [Ra + Rb << sv]
 	     From observation, the pattern looks like:
 	     (plus:SI (mult:SI (reg:SI 58)
-	                       (const_int 4 [0x4]))
-	              (reg/f:SI 57)) */
+			       (const_int 4 [0x4]))
+		      (reg/f:SI 57)) */
 	  int sv;
 
 	  /* We need to set sv to output shift value.  */
@@ -2974,6 +3900,8 @@
 	    sv = 1;
 	  else if (INTVAL (XEXP (op0, 1)) == 4)
 	    sv = 2;
+	  else if (INTVAL (XEXP (op0, 1)) == 8)
+	    sv = 3;
 	  else
 	    gcc_unreachable ();
 
@@ -2993,20 +3921,20 @@
 
     case POST_MODIFY:
       /* (post_modify (regA) (plus (regA) (regB)))
-         (post_modify (regA) (plus (regA) (const_int)))
-         We would like to extract
-         regA and regB (or const_int) from plus rtx.  */
+	 (post_modify (regA) (plus (regA) (const_int)))
+	 We would like to extract
+	 regA and regB (or const_int) from plus rtx.  */
       op0 = XEXP (XEXP (x, 1), 0);
       op1 = XEXP (XEXP (x, 1), 1);
 
       /* Checking op0, forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REG_P (op0)
 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
 	sorry ("a nested function is not supported for reduced registers");
       /* Checking op1, forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REG_P (op1)
 	  && REGNO (op1) == STATIC_CHAIN_REGNUM)
@@ -3021,8 +3949,8 @@
       else if (REG_P (op0) && CONST_INT_P (op1))
 	{
 	  /* [Ra], imm */
-	  fprintf (stream, "[%s], %d",
-			   reg_names[REGNO (op0)], (int)INTVAL (op1));
+	  fprintf (stream, "[%s], " HOST_WIDE_INT_PRINT_DEC,
+			   reg_names[REGNO (op0)], INTVAL (op1));
 	}
       else
 	{
@@ -3038,7 +3966,7 @@
       op0 = XEXP (x, 0);
 
       /* Checking op0, forbid using static chain register ($r16)
-         on reduced-set registers configuration.  */
+	 on reduced-set registers configuration.  */
       if (TARGET_REDUCED_REGS
 	  && REG_P (op0)
 	  && REGNO (op0) == STATIC_CHAIN_REGNUM)
@@ -3062,14 +3990,83 @@
 
     default :
       /* Generally, output_addr_const () is able to handle most cases.
-         We want to see what CODE could appear,
-         so we use gcc_unreachable() to stop it.  */
+	 We want to see what CODE could appear,
+	 so we use gcc_unreachable() to stop it.  */
       debug_rtx (x);
       gcc_unreachable ();
       break;
     }
 }
 
+/* -- Assembler Commands for Exception Regions.  */
+
+static rtx
+nds32_dwarf_register_span (rtx reg)
+{
+  rtx dwarf_high, dwarf_low;
+  rtx dwarf_single;
+  enum machine_mode mode;
+  int regno;
+
+  mode = GET_MODE (reg);
+  regno = REGNO (reg);
+
+  /* We need to adjust dwarf register information for floating-point registers
+     rather than using default register number mapping.  */
+  if (regno >= NDS32_FIRST_FPR_REGNUM
+      && regno <= NDS32_LAST_FPR_REGNUM)
+    {
+      /* The nds32 port in GDB maintains a mapping between dwarf register
+	 number and displayed register name.  For backward compatibility to
+	 previous toolchain, currently our gdb still has four registers
+	 (d0.l, d0.h, d1.l, and d1.h) between GPR and FPR while compiler
+	 does not count those four registers in its register number table.
+	 So we have to add 4 on its register number and then create new
+	 dwarf information.  Hopefully we can discard such workaround
+	 in the future.  */
+      regno += 4;
+
+      if (mode == DFmode || mode == SCmode)
+	{
+	  /* By default, GCC maps increasing register numbers to increasing
+	     memory locations, but paired FPRs in NDS32 target are always
+	     big-endian, i.e.:
+
+	       fd0 :  fs0   fs1
+		     (MSB) (LSB)
+
+	     We must return parallel rtx to represent such layout.  */
+	  dwarf_high = gen_rtx_REG (word_mode, regno);
+	  dwarf_low = gen_rtx_REG (word_mode, regno + 1);
+	  return gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2, dwarf_low, dwarf_high));
+	}
+      else if (mode == DCmode)
+	{
+	  rtx dwarf_high_re = gen_rtx_REG (word_mode, regno);
+	  rtx dwarf_low_re = gen_rtx_REG (word_mode, regno + 1);
+	  rtx dwarf_high_im = gen_rtx_REG (word_mode, regno);
+	  rtx dwarf_low_im = gen_rtx_REG (word_mode, regno + 1);
+	  return gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (4, dwarf_low_re, dwarf_high_re,
+						 dwarf_high_im, dwarf_low_im));
+	}
+      else if (mode == SFmode || mode == SImode)
+	{
+	  /* Create new dwarf information with adjusted register number.  */
+	  dwarf_single = gen_rtx_REG (word_mode, regno);
+	  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, dwarf_single));
+	}
+      else
+	{
+	  /* We should not be here.  */
+	  gcc_unreachable ();
+	}
+    }
+
+  return NULL_RTX;
+}
+
 
 /* Defining target-specific uses of __attribute__.  */
 
@@ -3098,6 +4095,27 @@
 static void
 nds32_insert_attributes (tree decl, tree *attributes)
 {
+  /* A "indirect_call" function attribute implies "noinline" and "noclone"
+     for elf toolchain to support ROM patch mechanism.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && lookup_attribute ("indirect_call", *attributes) != NULL)
+    {
+      tree new_attrs = *attributes;
+
+      if (TARGET_LINUX_ABI)
+	error("cannot use indirect_call attribute under linux toolchain");
+
+      if (lookup_attribute ("noinline", new_attrs) == NULL)
+	new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs);
+      if (lookup_attribute ("noclone", new_attrs) == NULL)
+	new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs);
+
+      if (!TREE_PUBLIC (decl))
+	error("indirect_call attribute can't apply for static function");
+
+      *attributes = new_attrs;
+    }
+
   /* For function declaration, we need to check isr-specific attributes:
        1. Call nds32_check_isr_attrs_conflict() to check any conflict.
        2. Check valid integer value for interrupt/exception.
@@ -3115,14 +4133,46 @@
       nds32_check_isr_attrs_conflict (decl, func_attrs);
 
       /* Now we are starting to check valid id value
-         for interrupt/exception/reset.
-         Note that we ONLY check its validity here.
-         To construct isr vector information, it is still performed
-         by nds32_construct_isr_vectors_information().  */
+	 for interrupt/exception/reset.
+	 Note that we ONLY check its validity here.
+	 To construct isr vector information, it is still performed
+	 by nds32_construct_isr_vectors_information().  */
       intr  = lookup_attribute ("interrupt", func_attrs);
       excp  = lookup_attribute ("exception", func_attrs);
       reset = lookup_attribute ("reset", func_attrs);
 
+      /* The following code may use attribute arguments.  If there is no
+	 argument from source code, it will cause segmentation fault.
+	 Therefore, return dircetly and report error message later.  */
+      if ((intr && TREE_VALUE (intr) == NULL)
+	  || (excp && TREE_VALUE (excp) == NULL)
+	  || (reset && TREE_VALUE (reset) == NULL))
+	return;
+
+      /* ------------------------------------------------------------- */
+      /* FIXME:
+	 FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+	     __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+	     __attribute__((exception("XXX;YYY;id=ZZZ")))
+	     __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+	 If interrupt/exception/reset appears and its argument is a
+	 STRING_CST, we will use other functions to parse string in the
+	 nds32_construct_isr_vectors_information() and then set necessary
+	 isr information in the nds32_isr_vectors[] array.  Here we can
+	 just return immediately to avoid new-syntax checking.  */
+      if (intr != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
+	return;
+      if (excp != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
+	return;
+      if (reset != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
+	return;
+      /* ------------------------------------------------------------- */
+
       if (intr || excp)
 	{
 	  /* Deal with interrupt/exception.  */
@@ -3239,17 +4289,37 @@
     {
       /* Under V2 ISA, we need to strictly disable TARGET_V3PUSH.  */
       target_flags &= ~MASK_V3PUSH;
+      /* Under V2 ISA, we need to strictly disable TARGET_IFC.  */
+      target_flags &= ~MASK_IFC;
+      /* Under V2 ISA, we need to strictly disable TARGET_EX9.  */
+      target_flags &= ~MASK_EX9;
+      /* If this is ARCH_V2J, we need to enable TARGET_REDUCED_REGS.  */
+      if (nds32_arch_option == ARCH_V2J)
+	target_flags |= MASK_REDUCED_REGS;
     }
   if (TARGET_ISA_V3)
     {
-      /* Under V3 ISA, currently nothing should be strictly set.  */
+      /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS.  */
+      if (nds32_arch_option == ARCH_V3J)
+	target_flags |= MASK_REDUCED_REGS;
     }
   if (TARGET_ISA_V3M)
     {
       /* Under V3M ISA, we need to strictly enable TARGET_REDUCED_REGS.  */
       target_flags |= MASK_REDUCED_REGS;
-      /* Under V3M ISA, we need to strictly disable TARGET_PERF_EXT.  */
-      target_flags &= ~MASK_PERF_EXT;
+      /* Under V3M ISA, we need to strictly disable TARGET_IFC.  */
+      target_flags &= ~MASK_IFC;
+      /* Under V3M ISA, we need to strictly disable TARGET_EX9.  */
+      target_flags &= ~MASK_EX9;
+      /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF.  */
+      target_flags &= ~MASK_EXT_PERF;
+      /* Under V3M ISA, we need to strictly disable TARGET_EXT_PERF2.  */
+      target_flags &= ~MASK_EXT_PERF2;
+      /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING.  */
+      target_flags &= ~MASK_EXT_STRING;
+
+      if (flag_pic)
+	error ("not support -fpic option for v3m toolchain");
     }
 
   /* See if we are using reduced-set registers:
@@ -3260,7 +4330,7 @@
       int r;
 
       /* Prevent register allocator from
-         choosing it as doing register allocation.  */
+	 choosing it as doing register allocation.  */
       for (r = 11; r <= 14; r++)
 	fixed_regs[r] = call_used_regs[r] = 1;
       for (r = 16; r <= 27; r++)
@@ -3279,127 +4349,495 @@
       target_flags &= ~MASK_V3PUSH;
     }
 
-  /* Currently, we don't support PIC code generation yet.  */
-  if (flag_pic)
+  if (TARGET_HARD_FLOAT && !(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE))
+    {
+      if (nds32_arch_option == ARCH_V3S || nds32_arch_option == ARCH_V3F)
+	error ("Disable FPU ISA, "
+	       "the ABI option must be enable '-mfloat-abi=soft'");
+      else
+	error ("'-mfloat-abi=hard' option just support FPU ISA, "
+	       "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'");
+    }
+
+  /* ELF toolchain don't support PIC code generation.  */
+  if (!TARGET_LINUX_ABI && flag_pic)
     sorry ("not support -fpic");
+
+  nds32_register_passes ();
+
+  nds32_init_rtx_costs ();
+
+  /* This is magic hack for our Coremark score...  */
+  if (global_options.x_flag_tree_switch_shortcut)
+    {
+      maybe_set_param_value
+	(PARAM_MAX_AVERAGE_UNROLLED_INSNS,
+	 200,
+	 global_options.x_param_values, global_options_set.x_param_values);
+
+      maybe_set_param_value
+	(PARAM_MAX_GROW_COPY_BB_INSNS,
+	 16,
+	 global_options.x_param_values, global_options_set.x_param_values);
+    }
 }
 
 
 /* Miscellaneous Parameters.  */
 
+static tree
+nds32_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+		       tree inputs ATTRIBUTE_UNUSED,
+		       tree clobbers)
+{
+  clobbers = tree_cons (NULL_TREE, build_string (3, "$ta"),
+			clobbers);
+  return clobbers;
+}
+/* Insert end_label and check loop body whether is empty.  */
+static bool
+nds32_hwloop_insert_end_label (rtx loop_id, rtx end_label)
+{
+  rtx insn = NULL_RTX;
+  basic_block bb;
+  rtx cfg_id, last_insn;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (NOTE_P (insn))
+	    continue;
+
+	  if (recog_memoized (insn) == CODE_FOR_hwloop_cfg
+	      && INSN_P (insn))
+	    {
+	      cfg_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 5), 0, 0);
+	      if (cfg_id == loop_id)
+		{
+		  for (last_insn = PREV_INSN (insn); last_insn != BB_HEAD (bb);
+		       last_insn = PREV_INSN (last_insn))
+		    {
+		      if (NONDEBUG_INSN_P (last_insn))
+			{
+			  emit_label_before (end_label, last_insn);
+			  /* The last_insn don't do ifcall.  */
+			  emit_insn_before (gen_no_ifc_begin (), last_insn);
+			  emit_insn_after (gen_no_ifc_end (), last_insn);
+			  /* The last_insn don't do ex9.  */
+			  emit_insn_before (gen_no_ex9_begin (), last_insn);
+			  emit_insn_after (gen_no_ex9_end (), last_insn);
+			  return true;
+			}
+		    }
+
+		  if (NOTE_INSN_BASIC_BLOCK_P (last_insn))
+		    {
+		      rtx nop = emit_insn_before (gen_unspec_nop (), last_insn);
+		      emit_label_before (end_label, nop);
+		      /* The last_insn don't do ifcall.  */
+		      emit_insn_before (gen_no_ifc_begin (), last_insn);
+		      emit_insn_after (gen_no_ifc_end (), last_insn);
+		      /* The last_insn don't do ex9.  */
+		      emit_insn_before (gen_no_ex9_begin (), last_insn);
+		      emit_insn_after (gen_no_ex9_end (), last_insn);
+		      return true;
+		    }
+		}
+	    }
+	}
+    }
+
+  if (insn != NULL_RTX)
+    delete_insn (insn);
+  return false;
+}
+
 static void
-nds32_init_builtins (void)
+nds32_hwloop_remove (rtx loop_id)
+{
+  rtx insn, le_id;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (NOTE_P (insn))
+	    continue;
+
+	  if (recog_memoized (insn) == CODE_FOR_init_lc
+	      && INSN_P (insn))
+	    {
+	      le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
+	      if (loop_id == le_id)
+		{
+		  delete_insn (insn);
+		  return;
+		}
+	    }
+	}
+    }
+}
+
+/* Insert isb instruction for hwloop.  */
+static void
+nds32_hwloop_insert_isb (rtx loop_id)
+{
+  rtx insn, le_id;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (NOTE_P (insn))
+	    continue;
+
+	  if (recog_memoized (insn) == CODE_FOR_init_lc
+	      && INSN_P (insn))
+	    {
+	      le_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
+	      if (loop_id == le_id)
+		{
+		  emit_insn_after (gen_unspec_volatile_isb (), insn);
+		  return;
+		}
+	    }
+	}
+    }
+}
+/* Insert mtlei instruction for hwloop.  */
+static void
+nds32_hwloop_insert_init_end ()
+{
+  rtx insn;
+  basic_block bb;
+  rtx loop_id, end_label;
+  bool hwloop_p;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (NOTE_P (insn))
+	    continue;
+
+	  if (recog_memoized (insn) == CODE_FOR_mtlbi_hint
+	      && INSN_P (insn))
+	    {
+	      end_label = gen_label_rtx ();
+	      loop_id = XVECEXP (XVECEXP (PATTERN (insn), 0, 1), 0, 0);
+	      hwloop_p = nds32_hwloop_insert_end_label (loop_id, end_label);
+
+	      if (!hwloop_p)
+		{
+		  delete_insn (insn);
+		  nds32_hwloop_remove (loop_id);
+		}
+	      else
+		{
+		  emit_insn_after (gen_mtlei (gen_rtx_LABEL_REF (Pmode, end_label)), insn);
+		  nds32_hwloop_insert_isb (loop_id);
+		}
+	    }
+	}
+    }
+}
+
+/* Perform machine-dependent processing.  */
+static void
+nds32_machine_dependent_reorg (void)
 {
-  tree pointer_type_node  = build_pointer_type (integer_type_node);
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it
+     now.  */
+  compute_bb_for_insn ();
+  df_analyze ();
+
+  if (TARGET_HWLOOP)
+    nds32_hwloop_insert_init_end ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+      df_finish_pass (false);
+    }
+
+  /* Use -minnermost-loop to enable,
+     need more testing to verify result.  */
+  if (TARGET_INNERMOST_LOOP)
+    nds32_insert_innermost_loop ();
+
+  nds32_insert_isps ();
+}
 
-  tree void_ftype_void    = build_function_type (void_type_node,
-						 void_list_node);
+static void
+nds32_init_builtins (void)
+{
+  nds32_init_builtins_impl ();
+}
 
-  tree void_ftype_pint    = build_function_type_list (void_type_node,
-						      pointer_type_node,
-						      NULL_TREE);
-
-  tree int_ftype_int      = build_function_type_list (integer_type_node,
-						      integer_type_node,
-						      NULL_TREE);
-
-  tree void_ftype_int_int = build_function_type_list (void_type_node,
-						      integer_type_node,
-						      integer_type_node,
-						      NULL_TREE);
-
-  /* Cache.  */
-  add_builtin_function ("__builtin_nds32_isync",  void_ftype_pint,
-			NDS32_BUILTIN_ISYNC,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_isb",  void_ftype_void,
-			NDS32_BUILTIN_ISB,
-			BUILT_IN_MD, NULL, NULL_TREE);
-
-  /* Register Transfer.  */
-  add_builtin_function ("__builtin_nds32_mfsr",  int_ftype_int,
-			NDS32_BUILTIN_MFSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int,
-			NDS32_BUILTIN_MFUSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mtsr",  void_ftype_int_int,
-			NDS32_BUILTIN_MTSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int,
-			NDS32_BUILTIN_MTUSR,
-			BUILT_IN_MD, NULL, NULL_TREE);
-
-  /* Interrupt.  */
-  add_builtin_function ("__builtin_nds32_setgie_en",  void_ftype_void,
-			NDS32_BUILTIN_SETGIE_EN,
-			BUILT_IN_MD, NULL, NULL_TREE);
-  add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void,
-			NDS32_BUILTIN_SETGIE_DIS,
-			BUILT_IN_MD, NULL, NULL_TREE);
+static tree
+nds32_builtin_decl (unsigned code, bool initialize_p)
+{
+  /* Implement in nds32-intrinsic.c.  */
+  return nds32_builtin_decl_impl (code, initialize_p);
 }
 
 static rtx
 nds32_expand_builtin (tree exp,
 		      rtx target,
-		      rtx subtarget ATTRIBUTE_UNUSED,
-		      enum machine_mode mode ATTRIBUTE_UNUSED,
-		      int ignore ATTRIBUTE_UNUSED)
-{
-  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-
-  int fcode = DECL_FUNCTION_CODE (fndecl);
-
-  switch (fcode)
-    {
-    /* Cache.  */
-    case NDS32_BUILTIN_ISYNC:
-      return nds32_expand_builtin_null_ftype_reg
-	     (CODE_FOR_unspec_volatile_isync, exp, target);
-    case NDS32_BUILTIN_ISB:
-      /* Since there are no result and operands for isb instruciton,
-         we can simply emit this rtx.  */
-      emit_insn (gen_unspec_volatile_isb ());
-      return target;
-
-    /* Register Transfer.  */
-    case NDS32_BUILTIN_MFSR:
-      return nds32_expand_builtin_reg_ftype_imm
-	     (CODE_FOR_unspec_volatile_mfsr, exp, target);
-    case NDS32_BUILTIN_MFUSR:
-      return nds32_expand_builtin_reg_ftype_imm
-	     (CODE_FOR_unspec_volatile_mfusr, exp, target);
-    case NDS32_BUILTIN_MTSR:
-      return nds32_expand_builtin_null_ftype_reg_imm
-	     (CODE_FOR_unspec_volatile_mtsr, exp, target);
-    case NDS32_BUILTIN_MTUSR:
-      return nds32_expand_builtin_null_ftype_reg_imm
-	     (CODE_FOR_unspec_volatile_mtusr, exp, target);
-
-    /* Interrupt.  */
-    case NDS32_BUILTIN_SETGIE_EN:
-      /* Since there are no result and operands for setgie.e instruciton,
-         we can simply emit this rtx.  */
-      emit_insn (gen_unspec_volatile_setgie_en ());
-      return target;
-    case NDS32_BUILTIN_SETGIE_DIS:
-      /* Since there are no result and operands for setgie.d instruciton,
-         we can simply emit this rtx.  */
-      emit_insn (gen_unspec_volatile_setgie_dis ());
-      return target;
+		      rtx subtarget,
+		      enum machine_mode mode,
+		      int ignore)
+{
+  /* Implement in nds32-intrinsic.c.  */
+  return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore);
+}
 
-    default:
-      gcc_unreachable ();
-    }
+static bool
+nds32_have_conditional_execution (void)
+{
+  /* Lie to gcc that we have conditional execution for change optimization flow
+     in if-conversion, LRA and scheduling phase.
+     In our experiment result show that cand reduce about 2% code size with very
+     minor performance degradation in average.  */
+  return optimize_size;
+}
 
-  return NULL_RTX;
+/* Implement TARGET_INIT_LIBFUNCS.  */
+static void
+nds32_init_libfuncs (void)
+{
+  if (TARGET_LINUX_ABI)
+    init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+/* Implement TARGET_CAN_USE_DOLOOP_P.  */
+static bool
+nds32_can_use_doloop_p (double_int, double_int iterations_max,
+			unsigned int, bool entered_at_top)
+{
+  /* Using hwloop must be entered from the top.  */
+  if (!entered_at_top)
+    return false;
+
+  if (lookup_attribute ("no_ext_zol", DECL_ATTRIBUTES (current_function_decl)))
+    return false;
+
+  /* Initial hardware loops too costly, so we must avoid to
+     generate a hardware loops when loop count less then 8. */
+  if (!NDS32_HW_LOOP_P ()
+      || iterations_max.low < 8)
+    return false;
+  return true;
 }
 
+/* NULL if INSN insn is valid within a low-overhead loop.
+   Otherwise return why doloop cannot be applied.  */
+static const char *
+nds32_invalid_within_doloop (const_rtx insn)
+{
+  if (CALL_P (insn))
+    return "Function call in the loop.";
+  else if (INSN_CODE (insn) == CODE_FOR_pop25return
+	   || INSN_CODE (insn) == CODE_FOR_return_internal)
+    return "Simple return in the loop.";
+
+  return NULL;
+}
 
 /* ------------------------------------------------------------------------ */
 
-/* PART 4: Implemet extern function definitions,
-           the prototype is in nds32-protos.h.  */
+/* PART 5: Implemet extern function definitions,
+	   the prototype is in nds32-protos.h.  */
+
+/* Run-time Target Specification.  */
+
+void
+nds32_cpu_cpp_builtins(struct cpp_reader *pfile)
+{
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+  builtin_define ("__nds32__");
+  builtin_define ("__NDS32__");
+
+  /* We need to provide builtin macro to describe the size of
+     each vector for interrupt handler under elf toolchain.  */
+  if (!TARGET_LINUX_ABI)
+    {
+      if (TARGET_ISR_VECTOR_SIZE_4_BYTE)
+	builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__");
+      else
+	builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__");
+    }
+
+  if (TARGET_HARD_FLOAT)
+    builtin_define ("__NDS32_ABI_2FP_PLUS__");
+  else
+    builtin_define ("__NDS32_ABI_2__");
+
+  if (TARGET_ISA_V2)
+    builtin_define ("__NDS32_ISA_V2__");
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_ISA_V3__");
+  if (TARGET_ISA_V3M)
+    builtin_define ("__NDS32_ISA_V3M__");
+
+  if (TARGET_FPU_SINGLE)
+    builtin_define ("__NDS32_EXT_FPU_SP__");
+  if (TARGET_FPU_DOUBLE)
+    builtin_define ("__NDS32_EXT_FPU_DP__");
+
+  if (TARGET_EXT_FPU_FMA)
+    builtin_define ("__NDS32_EXT_FPU_FMA__");
+  if (NDS32_EXT_FPU_DOT_E)
+    builtin_define ("__NDS32_EXT_FPU_DOT_E__");
+  if (TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+    {
+      switch (nds32_fp_regnum)
+	{
+	case 0:
+	case 4:
+	  builtin_define ("__NDS32_EXT_FPU_CONFIG_0__");
+	  break;
+	case 1:
+	case 5:
+	  builtin_define ("__NDS32_EXT_FPU_CONFIG_1__");
+	  break;
+	case 2:
+	case 6:
+	  builtin_define ("__NDS32_EXT_FPU_CONFIG_2__");
+	  break;
+	case 3:
+	case 7:
+	  builtin_define ("__NDS32_EXT_FPU_CONFIG_3__");
+	  break;
+	default:
+	  abort ();
+	}
+    }
+
+  if (TARGET_BIG_ENDIAN)
+    builtin_define ("__NDS32_EB__");
+  else
+    builtin_define ("__NDS32_EL__");
+
+  if (TARGET_REDUCED_REGS)
+    builtin_define ("__NDS32_REDUCED_REGS__");
+  if (TARGET_CMOV)
+    builtin_define ("__NDS32_CMOV__");
+  if (TARGET_EXT_PERF)
+    builtin_define ("__NDS32_EXT_PERF__");
+  if (TARGET_EXT_PERF2)
+    builtin_define ("__NDS32_EXT_PERF2__");
+  if (TARGET_EXT_STRING)
+    builtin_define ("__NDS32_EXT_STRING__");
+  if (TARGET_16_BIT)
+    builtin_define ("__NDS32_16_BIT__");
+  if (TARGET_GP_DIRECT)
+    builtin_define ("__NDS32_GP_DIRECT__");
+  if (TARGET_VH)
+    builtin_define ("__NDS32_VH__");
+  if (NDS32_EXT_DSP_P ())
+    builtin_define ("__NDS32_EXT_DSP__");
+  if (NDS32_HW_LOOP_P ())
+    builtin_define ("__NDS32_EXT_ZOL__");
+
+  /* Extra builtin macros.  */
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_EXT_IFC__");
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_EXT_EX9__");
+  if (TARGET_BIG_ENDIAN)
+    builtin_define ("__big_endian__");
+
+  builtin_assert ("cpu=nds32");
+  builtin_assert ("machine=nds32");
+
+  /* FOR BACKWARD COMPATIBILITY.  */
+  if (TARGET_ISA_V2)
+    builtin_define ("__NDS32_BASELINE_V2__");
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_BASELINE_V3__");
+  if (TARGET_ISA_V3M)
+    builtin_define ("__NDS32_BASELINE_V3M__");
+  if (TARGET_REDUCED_REGS)
+    builtin_define ("__NDS32_REDUCE_REGS__");
+
+  if (TARGET_ISA_V2)
+    builtin_define ("NDS32_BASELINE_V2");
+  if (TARGET_ISA_V3)
+    builtin_define ("NDS32_BASELINE_V3");
+  if (TARGET_ISA_V3M)
+    builtin_define ("NDS32_BASELINE_V3M");
+  if (TARGET_REDUCED_REGS)
+    builtin_define ("NDS32_REDUCE_REGS");
+  if (TARGET_FPU_SINGLE)
+    builtin_define ("NDS32_EXT_FPU_SP");
+  if (TARGET_FPU_DOUBLE)
+    builtin_define ("NDS32_EXT_FPU_DP");
+  if (TARGET_EXT_PERF)
+    builtin_define ("NDS32_EXT_PERF");
+  if (TARGET_EXT_PERF2)
+    builtin_define ("NDS32_EXT_PERF2");
+  if (TARGET_EXT_STRING)
+    builtin_define ("NDS32_EXT_STRING");
+  if (TARGET_ISA_V3)
+    builtin_define ("NDS32_EXT_IFC");
+  if (TARGET_ISA_V3)
+    builtin_define ("NDS32_EXT_EX9");
+
+  if (TARGET_HARD_FLOAT)
+    builtin_define ("NDS32_ABI_2FP_PLUS");
+  else
+    builtin_define ("NDS32_ABI_2");
+
+  if (TARGET_BIG_ENDIAN)
+    builtin_define ("NDS32_EB");
+  else
+    builtin_define ("NDS32_EL");
+
+  if (TARGET_ISA_V2)
+    builtin_define ("__NDS32_BASELINE_V2");
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_BASELINE_V3");
+  if (TARGET_ISA_V3M)
+    builtin_define ("__NDS32_BASELINE_V3M");
+  if (TARGET_REDUCED_REGS)
+    builtin_define ("__NDS32_REDUCE_REGS");
+  if (TARGET_FPU_SINGLE)
+    builtin_define ("__NDS32_EXT_FPU_SP");
+  if (TARGET_FPU_DOUBLE)
+    builtin_define ("__NDS32_EXT_FPU_DP");
+  if (TARGET_EXT_PERF)
+    builtin_define ("__NDS32_EXT_PERF");
+  if (TARGET_EXT_PERF2)
+    builtin_define ("__NDS32_EXT_PERF2");
+  if (TARGET_EXT_STRING)
+    builtin_define ("__NDS32_EXT_STRING");
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_EXT_IFC");
+
+  if (TARGET_ISA_V3)
+    builtin_define ("__NDS32_EXT_EX9");
+
+  if (TARGET_HARD_FLOAT)
+    builtin_define ("__NDS32_ABI_2FP_PLUS");
+  else
+    builtin_define ("__NDS32_ABI_2");
+
+  if (TARGET_BIG_ENDIAN)
+    builtin_define ("__NDS32_EB");
+  else
+    builtin_define ("__NDS32_EL");
+#undef builtin_define
+#undef builtin_assert
+}
+
 
 /* Defining Data Structures for Per-function Information.  */
 
@@ -3413,6 +4851,25 @@
 
 /* Register Usage.  */
 
+/* -- Order of Allocation of Registers.  */
+
+void
+nds32_adjust_reg_alloc_order (void)
+{
+  const int nds32_reg_alloc_order[] = REG_ALLOC_ORDER;
+
+  /* Copy the default register allocation order, which is designed
+     to optimize for code size.  */
+  memcpy(reg_alloc_order, nds32_reg_alloc_order, sizeof (reg_alloc_order));
+
+  /* Adjust few register allocation order when optimizing for speed.  */
+  if (!optimize_size)
+    {
+      memcpy (reg_alloc_order, nds32_reg_alloc_order_for_speed,
+	      sizeof (nds32_reg_alloc_order_for_speed));
+    }
+}
+
 /* -- How Values Fit in Registers.  */
 
 int
@@ -3425,12 +4882,27 @@
 int
 nds32_hard_regno_mode_ok (int regno, enum machine_mode mode)
 {
+  if (regno > FIRST_PSEUDO_REGISTER)
+    return true;
+
+  if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) && NDS32_IS_FPR_REGNUM (regno))
+    {
+      if (NDS32_IS_EXT_FPR_REGNUM(regno))
+	return (NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) && (mode == DFmode));
+      else if (mode == SFmode || mode == SImode)
+	return NDS32_FPR_REGNO_OK_FOR_SINGLE (regno);
+      else if (mode == DFmode)
+	return NDS32_FPR_REGNO_OK_FOR_DOUBLE (regno);
+
+      return false;
+    }
+
   /* Restrict double-word quantities to even register pairs.  */
-  if (HARD_REGNO_NREGS (regno, mode) == 1
-      || !((regno) & 1))
-    return 1;
+  if (regno <= NDS32_LAST_GPR_REGNUM)
+    return (HARD_REGNO_NREGS (regno, mode) == 1
+	    || !((regno) & 1));
 
-  return 0;
+  return false;
 }
 
 
@@ -3454,7 +4926,16 @@
   else if (regno >= 20 && regno <= 31)
     return HIGH_REGS;
   else if (regno == 32 || regno == 33)
-    return FRAME_REGS;
+    {
+      /* $SFP and $AP is FRAME_REGS in fact, However prevent IRA don't
+	 know how to allocate register for $SFP and $AP, just tell IRA they
+	 are GENERAL_REGS, and ARM do this hack too.  */
+      return GENERAL_REGS;
+    }
+  else if (regno >= 34 && regno <= 97)
+    return FP_REGS;
+  else if (regno >= 98 && regno <= 100)
+    return LOOP_REGS;
   else
     return NO_REGS;
 }
@@ -3465,14 +4946,39 @@
 /* -- Basic Stack Layout.  */
 
 rtx
+nds32_dynamic_chain_address (rtx frameaddr)
+{
+  if (TARGET_V3PUSH)
+    {
+      /* If -mv3push is specified, we push $fp, $gp, and $lp into stack.
+         We can access dynamic chain address from stack by [$fp - 12].  */
+      return plus_constant (Pmode, frameaddr, -12);
+    }
+  else
+    {
+      /* For general case we push $fp and $lp into stack at prologue.
+         We can access dynamic chain address from stack by [$fp - 8].  */
+      return plus_constant (Pmode, frameaddr, -8);
+    }
+}
+
+rtx
 nds32_return_addr_rtx (int count,
-		       rtx frameaddr ATTRIBUTE_UNUSED)
+		       rtx frameaddr)
 {
-  /* There is no way to determine the return address
-     if frameaddr is the frame that has 'count' steps
-     up from current frame.  */
+  int offset;
+  rtx addr;
+
   if (count != 0)
-    return NULL_RTX;
+    {
+      /* In nds32 ABI design, we can expect that $lp is always available
+         from stack by [$fp - 4] location.  */
+      offset = -4;
+      addr = plus_constant (Pmode, frameaddr, offset);
+      addr = memory_address (Pmode, addr);
+
+      return gen_rtx_MEM (Pmode, addr);
+    }
 
   /* If count == 0, it means we are at current frame,
      the return address is $r30 ($lp).  */
@@ -3491,15 +4997,18 @@
   nds32_compute_stack_frame ();
 
   /* Remember to consider
-     cfun->machine->callee_saved_area_padding_bytes
+     cfun->machine->callee_saved_area_gpr_padding_bytes and
+     cfun->machine->eh_return_data_regs_size
      when calculating offset.  */
   if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM)
     {
       offset = (cfun->machine->fp_size
-	        + cfun->machine->gp_size
+		+ cfun->machine->gp_size
 		+ cfun->machine->lp_size
-		+ cfun->machine->callee_saved_regs_size
-		+ cfun->machine->callee_saved_area_padding_bytes
+		+ cfun->machine->callee_saved_gpr_regs_size
+		+ cfun->machine->callee_saved_area_gpr_padding_bytes
+		+ cfun->machine->callee_saved_fpr_regs_size
+		+ cfun->machine->eh_return_data_regs_size
 		+ cfun->machine->local_size
 		+ cfun->machine->out_args_size);
     }
@@ -3519,8 +5028,10 @@
       offset = (-1) * (cfun->machine->fp_size
 		       + cfun->machine->gp_size
 		       + cfun->machine->lp_size
-		       + cfun->machine->callee_saved_regs_size
-		       + cfun->machine->callee_saved_area_padding_bytes);
+		       + cfun->machine->callee_saved_gpr_regs_size
+		       + cfun->machine->callee_saved_area_gpr_padding_bytes
+		       + cfun->machine->callee_saved_fpr_regs_size
+		       + cfun->machine->eh_return_data_regs_size);
     }
   else
     {
@@ -3539,10 +5050,11 @@
 			    tree fndecl ATTRIBUTE_UNUSED,
 			    int n_named_args ATTRIBUTE_UNUSED)
 {
-  /* Initial available registers
-     (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM)
+  /* Initial available registers.  The values are offset against
+     NDS32_GPR_ARG_FIRST_REGNUM and NDS32_FPR_ARG_FIRST_REGNUM
      for passing arguments.  */
-  cum->reg_offset = 0;
+  cum->gpr_offset = 0;
+  cum->fpr_offset = 0;
 }
 
 /* -- Function Entry and Exit.  */
@@ -3553,57 +5065,71 @@
 {
   int fp_adjust;
   int sp_adjust;
-  int en4_const;
-
-  rtx Rb, Re;
-  rtx push_insn;
-  rtx fp_adjust_insn, sp_adjust_insn;
-
-  /* Before computing everything for stack frame size,
-     we check if it is still worth to use fp_as_gp optimization.
-     If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set
-     so that $fp will be saved on stack.  */
-  cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available ();
+  unsigned Rb, Re;
 
   /* Compute and setup stack frame size.
      The result will be in cfun->machine.  */
   nds32_compute_stack_frame ();
 
+  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
+  if (frame_pointer_needed)
+    cfun->machine->fp_as_gp_p = false;
+
+  /* If this is a variadic function, first we need to push argument
+     registers that hold the unnamed argument value.  */
+  if (cfun->machine->va_args_size != 0)
+    {
+      Rb = cfun->machine->va_args_first_regno;
+      Re = cfun->machine->va_args_last_regno;
+      /* No need to push $fp, $gp, or $lp.  */
+      nds32_emit_stack_push_multiple (Rb, Re, false, false, false, true);
+
+      /* We may also need to adjust stack pointer for padding bytes
+	 because varargs may cause $sp not 8-byte aligned.  */
+      if (cfun->machine->va_args_area_padding_bytes)
+	{
+	  /* Generate sp adjustment instruction.  */
+	  sp_adjust = cfun->machine->va_args_area_padding_bytes;
+
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+	  			   stack_pointer_rtx,
+				   -1 * sp_adjust);
+	}
+    }
+
   /* If the function is 'naked',
      we do not have to generate prologue code fragment.  */
-  if (cfun->machine->naked_p)
+  if (cfun->machine->naked_p && !flag_pic)
     return;
 
   /* Get callee_first_regno and callee_last_regno.  */
-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
-
-  /* push_insn = gen_stack_push_multiple(first_regno, last_regno),
-     the pattern 'stack_push_multiple' is implemented in nds32.md.
-     For En4 field, we have to calculate its constant value.
-     Refer to Andes ISA for more information.  */
-  en4_const = 0;
-  if (cfun->machine->fp_size)
-    en4_const += 8;
-  if (cfun->machine->gp_size)
-    en4_const += 4;
-  if (cfun->machine->lp_size)
-    en4_const += 2;
+  Rb = cfun->machine->callee_saved_first_gpr_regno;
+  Re = cfun->machine->callee_saved_last_gpr_regno;
 
   /* If $fp, $gp, $lp, and all callee-save registers are NOT required
      to be saved, we don't have to create multiple push instruction.
      Otherwise, a multiple push instruction is needed.  */
-  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
+  if (!(Rb == SP_REGNUM && Re == SP_REGNUM
+	&& cfun->machine->fp_size == 0
+	&& cfun->machine->gp_size == 0
+	&& cfun->machine->lp_size == 0))
     {
       /* Create multiple push instruction rtx.  */
-      push_insn = nds32_gen_stack_push_multiple (Rb, Re, GEN_INT (en4_const));
-      /* Emit rtx into instructions list and receive INSN rtx form.  */
-      push_insn = emit_insn (push_insn);
+      nds32_emit_stack_push_multiple (
+	Rb, Re,
+	cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size,
+	false);
+    }
+
+  /* Save eh data registers.  */
+  if (cfun->machine->use_eh_return_p)
+    {
+      Rb = cfun->machine->eh_return_data_first_regno;
+      Re = cfun->machine->eh_return_data_last_regno;
 
-      /* The insn rtx 'push_insn' will change frame layout.
-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
-         generate CFI (Call Frame Information) stuff.  */
-      RTX_FRAME_RELATED_P (push_insn) = 1;
+      /* No need to push $fp, $gp, or $lp.
+	 Also, this is not variadic arguments push.  */
+      nds32_emit_stack_push_multiple (Rb, Re, false, false, false, false);
     }
 
   /* Check frame_pointer_needed to see
@@ -3611,1824 +5137,830 @@
   if (frame_pointer_needed)
     {
       /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size)
-                          + (4 * callee-saved-registers)
-         Note: No need to adjust
-               cfun->machine->callee_saved_area_padding_bytes,
-               because, at this point, stack pointer is just
-               at the position after push instruction.  */
+			  + (4 * callee-saved-registers)
+			  + (4 * exception-handling-data-registers)
+	 Note: No need to adjust
+	       cfun->machine->callee_saved_area_gpr_padding_bytes,
+	       because, at this point, stack pointer is just
+	       at the position after push instruction.  */
       fp_adjust = cfun->machine->fp_size
 		  + cfun->machine->gp_size
 		  + cfun->machine->lp_size
-		  + cfun->machine->callee_saved_regs_size;
-      fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
-				   stack_pointer_rtx,
-				   GEN_INT (fp_adjust));
-      /* Emit rtx into instructions list and receive INSN rtx form.  */
-      fp_adjust_insn = emit_insn (fp_adjust_insn);
+		  + cfun->machine->callee_saved_gpr_regs_size
+		  + cfun->machine->eh_return_data_regs_size;
+
+      nds32_emit_adjust_frame (hard_frame_pointer_rtx,
+			       stack_pointer_rtx,
+			       fp_adjust);
     }
 
-  /* Adjust $sp = $sp - local_size - out_args_size
-                      - callee_saved_area_padding_bytes.  */
-  sp_adjust = cfun->machine->local_size
-	      + cfun->machine->out_args_size
-	      + cfun->machine->callee_saved_area_padding_bytes;
-  /* sp_adjust value may be out of range of the addi instruction,
-     create alternative add behavior with TA_REGNUM if necessary,
-     using NEGATIVE value to tell that we are decreasing address.  */
-  sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
-  if (sp_adjust)
+  /* Save fpu registers.  */
+  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
     {
-      /* Generate sp adjustment instruction if and only if sp_adjust != 0.  */
-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
+      /* When $sp moved to bottom of stack, we need to check whether
+	 the range of offset in the FPU instruction.  */
+      int fpr_offset = cfun->machine->local_size
+		       + cfun->machine->out_args_size
+		       + cfun->machine->callee_saved_fpr_regs_size;
+
+      /* Check FPU instruction offset imm14s.  */
+      if (!satisfies_constraint_Is14 (GEN_INT (fpr_offset)))
+	{
+	  int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
+			  + cfun->machine->callee_saved_fpr_regs_size;
+
+	  /* Save fpu registers, need to allocate stack space
+	     for fpu callee registers.  And now $sp position
+	     on callee saved fpr registers.  */
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
 				   stack_pointer_rtx,
-				   GEN_INT (-1 * sp_adjust));
-      /* Emit rtx into instructions list and receive INSN rtx form.  */
-      sp_adjust_insn = emit_insn (sp_adjust_insn);
+				   -1 * fpr_space);
+
+	  /* Emit fpu store instruction, using [$sp + offset] store
+	     fpu registers.  */
+	  nds32_emit_push_fpr_callee_saved (0);
+
+          /* Adjust $sp = $sp - local_size - out_args_size.  */
+	  sp_adjust = cfun->machine->local_size
+		      + cfun->machine->out_args_size;
+
+	  /* Allocate stack space for local size and out args size.  */
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   -1 * sp_adjust);
+	}
+      else
+	{
+	  /* Offset range in Is14, so $sp moved to bottom of stack.  */
+
+          /* Adjust $sp = $sp - local_size - out_args_size
+			      - callee_saved_area_gpr_padding_bytes
+			      - callee_saved_fpr_regs_size.  */
+	  sp_adjust = cfun->machine->local_size
+		      + cfun->machine->out_args_size
+		      + cfun->machine->callee_saved_area_gpr_padding_bytes
+		      + cfun->machine->callee_saved_fpr_regs_size;
+
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   -1 * sp_adjust);
 
-      /* The insn rtx 'sp_adjust_insn' will change frame layout.
-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
-         generate CFI (Call Frame Information) stuff.  */
-      RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+	  /* Emit fpu store instruction, using [$sp + offset] store
+	     fpu registers.  */
+	  int fpr_position = cfun->machine->out_args_size
+			     + cfun->machine->local_size;
+	  nds32_emit_push_fpr_callee_saved (fpr_position);
+	}
     }
+  else
+    {
+      /* Adjust $sp = $sp - local_size - out_args_size
+			  - callee_saved_area_gpr_padding_bytes.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_gpr_padding_bytes;
 
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
+      /* sp_adjust value may be out of range of the addi instruction,
+	 create alternative add behavior with TA_REGNUM if necessary,
+	 using NEGATIVE value to tell that we are decreasing address.  */
+      nds32_emit_adjust_frame (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       -1 * sp_adjust);
+    }
+
+  /* Emit gp setup instructions for -fpic.  */
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    nds32_emit_load_gp ();
+
+  /* If user applies -mno-sched-prolog-epilog option,
+     we need to prevent instructions of function body from being
+     scheduled with stack adjustment in prologue.  */
+  if (!flag_sched_prolog_epilog)
+    emit_insn (gen_blockage ());
 }
 
 /* Function for normal multiple pop epilogue.  */
 void
-nds32_expand_epilogue (void)
+nds32_expand_epilogue (bool sibcall_p)
 {
   int sp_adjust;
-  int en4_const;
-
-  rtx Rb, Re;
-  rtx pop_insn;
-  rtx sp_adjust_insn;
+  unsigned Rb, Re;
 
   /* Compute and setup stack frame size.
      The result will be in cfun->machine.  */
   nds32_compute_stack_frame ();
 
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
+  /* If user applies -mno-sched-prolog-epilog option,
+     we need to prevent instructions of function body from being
+     scheduled with stack adjustment in epilogue.  */
+  if (!flag_sched_prolog_epilog)
+    emit_insn (gen_blockage ());
 
   /* If the function is 'naked', we do not have to generate
-     epilogue code fragment BUT 'ret' instruction.  */
+     epilogue code fragment BUT 'ret' instruction.
+     However, if this function is also a variadic function,
+     we need to create adjust stack pointer before 'ret' instruction.  */
   if (cfun->machine->naked_p)
     {
-      /* Generate return instruction by using
-         unspec_volatile_func_return pattern.
-         Make sure this instruction is after gen_blockage().
-         NOTE that $lp will become 'live'
-         after this instruction has been emitted.  */
-      emit_insn (gen_unspec_volatile_func_return ());
+      /* If this is a variadic function, we do not have to restore argument
+	 registers but need to adjust stack pointer back to previous stack
+	 frame location before return.  */
+      if (cfun->machine->va_args_size != 0)
+	{
+	  /* Generate sp adjustment instruction.
+	     We  need to consider padding bytes here.  */
+	  sp_adjust = cfun->machine->va_args_size
+		      + cfun->machine->va_args_area_padding_bytes;
+
+  	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   sp_adjust);
+	}
+
+      /* Generate return instruction by using 'return_internal' pattern.
+	 Make sure this instruction is after gen_blockage().
+	 First we need to check this is a function without sibling call.  */
+      if (!sibcall_p)
+	{
+	  /* We need to further check attributes to determine whether
+	     there should be return instruction at epilogue.
+	     If the attribute naked exists but -mno-ret-in-naked-func
+	     is issued, there is NO need to generate return instruction.  */
+	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+	    return;
+
+	  emit_jump_insn (gen_return_internal ());
+	}
       return;
     }
 
   if (frame_pointer_needed)
     {
-      /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
-                          - (4 * callee-saved-registers)
-         Note: No need to adjust
-               cfun->machine->callee_saved_area_padding_bytes,
-               because we want to adjust stack pointer
-               to the position for pop instruction.  */
-      sp_adjust = cfun->machine->fp_size
-		  + cfun->machine->gp_size
-		  + cfun->machine->lp_size
-		  + cfun->machine->callee_saved_regs_size;
-      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-				   hard_frame_pointer_rtx,
-				   GEN_INT (-1 * sp_adjust));
-      /* Emit rtx into instructions list and receive INSN rtx form.  */
-      sp_adjust_insn = emit_insn (sp_adjust_insn);
-    }
-  else
-    {
-      /* If frame pointer is NOT needed,
-         we cannot calculate the sp adjustment from frame pointer.
-         Instead, we calculate the adjustment by local_size,
-         out_args_size, and callee_saved_area_padding_bytes.
-         Notice that such sp adjustment value may be out of range,
-         so we have to deal with it as well.  */
-
-      /* Adjust $sp = $sp + local_size + out_args_size
-                          + callee_saved_area_padding_bytes.  */
-      sp_adjust = cfun->machine->local_size
-		  + cfun->machine->out_args_size
-		  + cfun->machine->callee_saved_area_padding_bytes;
-      /* sp_adjust value may be out of range of the addi instruction,
-         create alternative add behavior with TA_REGNUM if necessary,
-         using POSITIVE value to tell that we are increasing address.  */
-      sp_adjust = nds32_force_addi_stack_int (sp_adjust);
-      if (sp_adjust)
-	{
-	  /* Generate sp adjustment instruction
-	     if and only if sp_adjust != 0.  */
-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-				       stack_pointer_rtx,
-				       GEN_INT (sp_adjust));
-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
-	}
-    }
-
-  /* Get callee_first_regno and callee_last_regno.  */
-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
-
-  /* pop_insn = gen_stack_pop_multiple(first_regno, last_regno),
-     the pattern 'stack_pop_multiple' is implementad in nds32.md.
-     For En4 field, we have to calculate its constant value.
-     Refer to Andes ISA for more information.  */
-  en4_const = 0;
-  if (cfun->machine->fp_size)
-    en4_const += 8;
-  if (cfun->machine->gp_size)
-    en4_const += 4;
-  if (cfun->machine->lp_size)
-    en4_const += 2;
-
-  /* If $fp, $gp, $lp, and all callee-save registers are NOT required
-     to be saved, we don't have to create multiple pop instruction.
-     Otherwise, a multiple pop instruction is needed.  */
-  if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0))
-    {
-      /* Create multiple pop instruction rtx.  */
-      pop_insn = nds32_gen_stack_pop_multiple (Rb, Re, GEN_INT (en4_const));
-      /* Emit pop instruction.  */
-      emit_insn (pop_insn);
-    }
-
-  /* Generate return instruction by using
-     unspec_volatile_func_return pattern.  */
-  emit_insn (gen_unspec_volatile_func_return ());
-}
-
-/* Function for v3push prologue.  */
-void
-nds32_expand_prologue_v3push (void)
-{
-  int fp_adjust;
-  int sp_adjust;
-
-  rtx Rb, Re;
-  rtx push_insn;
-  rtx fp_adjust_insn, sp_adjust_insn;
-
-  /* Before computing everything for stack frame size,
-     we check if it is still worth to use fp_as_gp optimization.
-     If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set
-     so that $fp will be saved on stack.  */
-  cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available ();
-
-  /* Compute and setup stack frame size.
-     The result will be in cfun->machine.  */
-  nds32_compute_stack_frame ();
-
-  /* If the function is 'naked',
-     we do not have to generate prologue code fragment.  */
-  if (cfun->machine->naked_p)
-    return;
-
-  /* Get callee_first_regno and callee_last_regno.  */
-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
-
-  /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available,
-     where imm8u has to be 8-byte alignment.  */
-  sp_adjust = cfun->machine->local_size
-	      + cfun->machine->out_args_size
-	      + cfun->machine->callee_saved_area_padding_bytes;
-
-  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
-      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
-    {
-      /* We can use 'push25 Re,imm8u'.  */
-
-      /* push_insn = gen_stack_v3push(last_regno, sp_adjust),
-         the pattern 'stack_v3push' is implemented in nds32.md.
-         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
-      push_insn = nds32_gen_stack_v3push (Rb, Re,
-					  GEN_INT (14), GEN_INT (sp_adjust));
-      /* emit rtx into instructions list and receive INSN rtx form */
-      push_insn = emit_insn (push_insn);
-
-      /* The insn rtx 'push_insn' will change frame layout.
-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
-         generate CFI (Call Frame Information) stuff.  */
-      RTX_FRAME_RELATED_P (push_insn) = 1;
-
-      /* Check frame_pointer_needed to see
-         if we shall emit fp adjustment instruction.  */
-      if (frame_pointer_needed)
+      /* Restore fpu registers.  */
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
 	{
-	  /* adjust $fp = $sp   + 4         ($fp size)
-	                        + 4         ($gp size)
-	                        + 4         ($lp size)
-	                        + (4 * n)   (callee-saved registers)
-	                        + sp_adjust ('push25 Re,imm8u')
-	     Note: Since we use 'push25 Re,imm8u',
-	           the position of stack pointer is further
-	           changed after push instruction.
-	           Hence, we need to take sp_adjust value
-	           into consideration.  */
-	  fp_adjust = cfun->machine->fp_size
+	  int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes;
+
+	  /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
+			      - (4 * callee-saved-registers)
+			      - (4 * exception-handling-data-registers)
+			      - (4 * callee-saved-gpr-registers padding byte)
+			      - (4 * callee-saved-fpr-registers)
+	     Note:  we want to adjust stack pointer
+		    to the position for callee-saved fpr register,
+		    And restore fpu register use .bi instruction to adjust $sp
+		    from callee-saved fpr register to pop instruction.  */
+	  sp_adjust = cfun->machine->fp_size
 		      + cfun->machine->gp_size
 		      + cfun->machine->lp_size
-		      + cfun->machine->callee_saved_regs_size
-		      + sp_adjust;
-	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
-				       stack_pointer_rtx,
-				       GEN_INT (fp_adjust));
-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
-	  fp_adjust_insn = emit_insn (fp_adjust_insn);
-	}
-    }
-  else
-    {
-      /* We have to use 'push25 Re,0' and
-         expand one more instruction to adjust $sp later.  */
-
-      /* push_insn = gen_stack_v3push(last_regno, sp_adjust),
-         the pattern 'stack_v3push' is implemented in nds32.md.
-         The (const_int 14) means v3push always push { $fp $gp $lp }.  */
-      push_insn = nds32_gen_stack_v3push (Rb, Re,
-					  GEN_INT (14), GEN_INT (0));
-      /* Emit rtx into instructions list and receive INSN rtx form.  */
-      push_insn = emit_insn (push_insn);
+		      + cfun->machine->callee_saved_gpr_regs_size
+		      + cfun->machine->eh_return_data_regs_size
+		      + cfun->machine->callee_saved_area_gpr_padding_bytes
+		      + cfun->machine->callee_saved_fpr_regs_size;
 
-      /* The insn rtx 'push_insn' will change frame layout.
-         We need to use RTX_FRAME_RELATED_P so that GCC is able to
-         generate CFI (Call Frame Information) stuff.  */
-      RTX_FRAME_RELATED_P (push_insn) = 1;
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   hard_frame_pointer_rtx,
+				   -1 * sp_adjust);
 
-      /* Check frame_pointer_needed to see
-         if we shall emit fp adjustment instruction.  */
-      if (frame_pointer_needed)
+	  /* Emit fpu load instruction, using .bi instruction
+	     load fpu registers.  */
+	  nds32_emit_pop_fpr_callee_saved (gpr_padding);
+	}
+      else
 	{
-	  /* adjust $fp = $sp + 4        ($fp size)
-	                      + 4        ($gp size)
-	                      + 4        ($lp size)
-	                      + (4 * n)  (callee-saved registers)
-	     Note: Since we use 'push25 Re,0',
-	           the stack pointer is just at the position
-	           after push instruction.
-	           No need to take sp_adjust into consideration.  */
-	  fp_adjust = cfun->machine->fp_size
+	  /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size)
+			      - (4 * callee-saved-registers)
+			      - (4 * exception-handling-data-registers)
+	     Note: No need to adjust
+		   cfun->machine->callee_saved_area_gpr_padding_bytes,
+		   because we want to adjust stack pointer
+		   to the position for pop instruction.  */
+	  sp_adjust = cfun->machine->fp_size
 		      + cfun->machine->gp_size
 		      + cfun->machine->lp_size
-		      + cfun->machine->callee_saved_regs_size;
-	  fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx,
-				       stack_pointer_rtx,
-				       GEN_INT (fp_adjust));
-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
-	  fp_adjust_insn = emit_insn (fp_adjust_insn);
-	}
+		      + cfun->machine->callee_saved_gpr_regs_size
+		      + cfun->machine->eh_return_data_regs_size;
 
-      /* Because we use 'push25 Re,0',
-         we need to expand one more instruction to adjust $sp.
-         However, sp_adjust value may be out of range of the addi instruction,
-         create alternative add behavior with TA_REGNUM if necessary,
-         using NEGATIVE value to tell that we are decreasing address.  */
-      sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust);
-      if (sp_adjust)
-	{
-	  /* Generate sp adjustment instruction
-	     if and only if sp_adjust != 0.  */
-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-				       stack_pointer_rtx,
-				       GEN_INT (-1 * sp_adjust));
-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
-
-	  /* The insn rtx 'sp_adjust_insn' will change frame layout.
-	     We need to use RTX_FRAME_RELATED_P so that GCC is able to
-	     generate CFI (Call Frame Information) stuff.  */
-	  RTX_FRAME_RELATED_P (sp_adjust_insn) = 1;
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   hard_frame_pointer_rtx,
+				   -1 * sp_adjust);
 	}
     }
-
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
-}
-
-/* Function for v3pop epilogue.  */
-void
-nds32_expand_epilogue_v3pop (void)
-{
-  int sp_adjust;
-
-  rtx Rb, Re;
-  rtx pop_insn;
-  rtx sp_adjust_insn;
-
-  /* Compute and setup stack frame size.
-     The result will be in cfun->machine.  */
-  nds32_compute_stack_frame ();
-
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
-
-  /* If the function is 'naked', we do not have to generate
-     epilogue code fragment BUT 'ret' instruction.  */
-  if (cfun->machine->naked_p)
-    {
-      /* Generate return instruction by using
-         unspec_volatile_func_return pattern.
-         Make sure this instruction is after gen_blockage().
-         NOTE that $lp will become 'live'
-         after this instruction has been emitted.  */
-      emit_insn (gen_unspec_volatile_func_return ());
-      return;
-    }
-
-  /* Get callee_first_regno and callee_last_regno.  */
-  Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno);
-  Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno);
-
-  /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available,
-     where imm8u has to be 8-byte alignment.  */
-  sp_adjust = cfun->machine->local_size
-	      + cfun->machine->out_args_size
-	      + cfun->machine->callee_saved_area_padding_bytes;
-
-  /* We have to consider alloca issue as well.
-     If the function does call alloca(), the stack pointer is not fixed.
-     In that case, we cannot use 'pop25 Re,imm8u' directly.
-     We have to caculate stack pointer from frame pointer
-     and then use 'pop25 Re,0'.
-     Of course, the frame_pointer_needed should be nonzero
-     if the function calls alloca().  */
-  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
-      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
-      && !cfun->calls_alloca)
+  else
     {
-      /* We can use 'pop25 Re,imm8u'.  */
+      /* Restore fpu registers.  */
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	{
+	  int gpr_padding = cfun->machine->callee_saved_area_gpr_padding_bytes;
 
-      /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust),
-         the pattern 'stack_v3pop' is implementad in nds32.md.
-         The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
-      pop_insn = nds32_gen_stack_v3pop (Rb, Re,
-					GEN_INT (14), GEN_INT (sp_adjust));
+	  /* Adjust $sp = $sp + local_size + out_args_size.  */
+	  sp_adjust = cfun->machine->local_size
+		      + cfun->machine->out_args_size;
 
-      /* Emit pop instruction.  */
-      emit_insn (pop_insn);
-    }
-  else
-    {
-      /* We have to use 'pop25 Re,0', and prior to it,
-         we must expand one more instruction to adjust $sp.  */
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   sp_adjust);
 
-      if (frame_pointer_needed)
-	{
-	  /* adjust $sp = $fp - 4        ($fp size)
-	                      - 4        ($gp size)
-	                      - 4        ($lp size)
-	                      - (4 * n)  (callee-saved registers)
-	     Note: No need to adjust
-	           cfun->machine->callee_saved_area_padding_bytes,
-	           because we want to adjust stack pointer
-	           to the position for pop instruction.  */
-	  sp_adjust = cfun->machine->fp_size
-		      + cfun->machine->gp_size
-		      + cfun->machine->lp_size
-		      + cfun->machine->callee_saved_regs_size;
-	  sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-				       hard_frame_pointer_rtx,
-				       GEN_INT (-1 * sp_adjust));
-	  /* Emit rtx into instructions list and receive INSN rtx form.  */
-	  sp_adjust_insn = emit_insn (sp_adjust_insn);
+	  /* Emit fpu load instruction, using .bi instruction
+	     load fpu registers, and adjust $sp from callee-saved fpr register
+	     to callee-saved gpr register.  */
+	  nds32_emit_pop_fpr_callee_saved (gpr_padding);
 	}
       else
 	{
 	  /* If frame pointer is NOT needed,
 	     we cannot calculate the sp adjustment from frame pointer.
 	     Instead, we calculate the adjustment by local_size,
-	     out_args_size, and callee_saved_area_padding_bytes.
+	     out_args_size, and callee_saved_area_gpr_padding_bytes.
 	     Notice that such sp adjustment value may be out of range,
 	     so we have to deal with it as well.  */
 
 	  /* Adjust $sp = $sp + local_size + out_args_size
-			      + callee_saved_area_padding_bytes.  */
+			      + callee_saved_area_gpr_padding_bytes.  */
 	  sp_adjust = cfun->machine->local_size
 		      + cfun->machine->out_args_size
-		      + cfun->machine->callee_saved_area_padding_bytes;
-	  /* sp_adjust value may be out of range of the addi instruction,
-	     create alternative add behavior with TA_REGNUM if necessary,
-	     using POSITIVE value to tell that we are increasing address.  */
-	  sp_adjust = nds32_force_addi_stack_int (sp_adjust);
-	  if (sp_adjust)
-	    {
-	      /* Generate sp adjustment instruction
-	         if and only if sp_adjust != 0.  */
-	      sp_adjust_insn = gen_addsi3 (stack_pointer_rtx,
-					   stack_pointer_rtx,
-					   GEN_INT (sp_adjust));
-	      /* Emit rtx into instructions list and receive INSN rtx form.  */
-	      sp_adjust_insn = emit_insn (sp_adjust_insn);
-	    }
-	}
-
-      /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust),
-         the pattern 'stack_v3pop' is implementad in nds32.md.  */
-      /* The (const_int 14) means v3pop always pop { $fp $gp $lp }.  */
-      pop_insn = nds32_gen_stack_v3pop (Rb, Re,
-					GEN_INT (14), GEN_INT (0));
+		      + cfun->machine->callee_saved_area_gpr_padding_bytes;
 
-      /* Emit pop instruction.  */
-      emit_insn (pop_insn);
+	  nds32_emit_adjust_frame (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   sp_adjust);
+	}
     }
-}
 
-/* ------------------------------------------------------------------------ */
-
-/* Function to test 333-form for load/store instructions.
-   This is auxiliary extern function for auxiliary macro in nds32.h.
-   Because it is a little complicated, we use function instead of macro.  */
-bool
-nds32_ls_333_p (rtx rt, rtx ra, rtx imm, enum machine_mode mode)
-{
-  if (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS
-      && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
+  /* Restore eh data registers.  */
+  if (cfun->machine->use_eh_return_p)
     {
-      if (GET_MODE_SIZE (mode) == 4)
-	return satisfies_constraint_Iu05 (imm);
+      Rb = cfun->machine->eh_return_data_first_regno;
+      Re = cfun->machine->eh_return_data_last_regno;
 
-      if (GET_MODE_SIZE (mode) == 2)
-	return satisfies_constraint_Iu04 (imm);
-
-      if (GET_MODE_SIZE (mode) == 1)
-	return satisfies_constraint_Iu03 (imm);
+      /* No need to pop $fp, $gp, or $lp.  */
+      nds32_emit_stack_pop_multiple (Rb, Re, false, false, false);
     }
 
-  return false;
-}
-
-
-/* Functions to expand load_multiple and store_multiple.
-   They are auxiliary extern functions to help create rtx template.
-   Check nds32-multiple.md file for the patterns.  */
-rtx
-nds32_expand_load_multiple (int base_regno, int count,
-			    rtx base_addr, rtx basemem)
-{
-  int par_index;
-  int offset;
-  rtx result;
-  rtx new_addr, mem, reg;
-
-  /* Create the pattern that is presented in nds32-multiple.md.  */
-
-  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = cfun->machine->callee_saved_first_gpr_regno;
+  Re = cfun->machine->callee_saved_last_gpr_regno;
 
-  for (par_index = 0; par_index < count; par_index++)
+  /* If $fp, $gp, $lp, and all callee-save registers are NOT required
+     to be saved, we don't have to create multiple pop instruction.
+     Otherwise, a multiple pop instruction is needed.  */
+  if (!(Rb == SP_REGNUM && Re == SP_REGNUM
+	&& cfun->machine->fp_size == 0
+	&& cfun->machine->gp_size == 0
+	&& cfun->machine->lp_size == 0))
     {
-      offset   = par_index * 4;
-      /* 4-byte for loading data to each register.  */
-      new_addr = plus_constant (Pmode, base_addr, offset);
-      mem      = adjust_automodify_address_nv (basemem, SImode,
-					       new_addr, offset);
-      reg      = gen_rtx_REG (SImode, base_regno + par_index);
-
-      XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, reg, mem);
+      /* Create multiple pop instruction rtx.  */
+      nds32_emit_stack_pop_multiple (
+	Rb, Re,
+	cfun->machine->fp_size, cfun->machine->gp_size, cfun->machine->lp_size);
     }
 
-  return result;
-}
-
-rtx
-nds32_expand_store_multiple (int base_regno, int count,
-			     rtx base_addr, rtx basemem)
-{
-  int par_index;
-  int offset;
-  rtx result;
-  rtx new_addr, mem, reg;
-
-  /* Create the pattern that is presented in nds32-multiple.md.  */
-
-  result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
-
-  for (par_index = 0; par_index < count; par_index++)
+  /* If this is a variadic function, we do not have to restore argument
+     registers but need to adjust stack pointer back to previous stack
+     frame location before return.  */
+  if (cfun->machine->va_args_size != 0)
     {
-      offset   = par_index * 4;
-      /* 4-byte for storing data to memory.  */
-      new_addr = plus_constant (Pmode, base_addr, offset);
-      mem      = adjust_automodify_address_nv (basemem, SImode,
-					       new_addr, offset);
-      reg      = gen_rtx_REG (SImode, base_regno + par_index);
+      /* Generate sp adjustment instruction.
+	 We need to consider padding bytes here.  */
+      sp_adjust = cfun->machine->va_args_size
+		  + cfun->machine->va_args_area_padding_bytes;
 
-      XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, mem, reg);
+      nds32_emit_adjust_frame (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       sp_adjust);
     }
 
-  return result;
-}
-
-/* Function to move block memory content by
-   using load_multiple and store_multiple.
-   This is auxiliary extern function to help create rtx template.
-   Check nds32-multiple.md file for the patterns.  */
-int
-nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
-{
-  HOST_WIDE_INT in_words, out_words;
-  rtx dst_base_reg, src_base_reg;
-  int maximum_bytes;
-
-  /* Because reduced-set regsiters has few registers
-     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
-      cannot be used for register allocation),
-     using 8 registers (32 bytes) for moving memory block
-     may easily consume all of them.
-     It makes register allocation/spilling hard to work.
-     So we only allow maximum=4 registers (16 bytes) for
-     moving memory block under reduced-set registers.  */
-  if (TARGET_REDUCED_REGS)
-    maximum_bytes = 16;
-  else
-    maximum_bytes = 32;
-
-  /* 1. Total_bytes is integer for sure.
-     2. Alignment is integer for sure.
-     3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes.
-     4. Requires (n * 4) block size.
-     5. Requires 4-byte alignment.  */
-  if (GET_CODE (total_bytes) != CONST_INT
-      || GET_CODE (alignment) != CONST_INT
-      || INTVAL (total_bytes) > maximum_bytes
-      || INTVAL (total_bytes) & 3
-      || INTVAL (alignment) & 3)
-    return 0;
-
-  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
-  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
-
-  out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD;
-
-  emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem));
-  emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem));
-
-  /* Successfully create patterns, return 1.  */
-  return 1;
-}
-
-/* Function to check whether the OP is a valid load/store operation.
-   This is a helper function for the predicates:
-   'nds32_load_multiple_operation' and 'nds32_store_multiple_operation'
-   in predicates.md file.
-
-   The OP is supposed to be a parallel rtx.
-   For each element within this parallel rtx:
-     (set (reg) (mem addr)) is the form for load operation.
-     (set (mem addr) (reg)) is the form for store operation.
-   We have to extract reg and mem of every element and
-   check if the information is valid for multiple load/store operation.  */
-bool
-nds32_valid_multiple_load_store (rtx op, bool load_p)
-{
-  int count;
-  int first_elt_regno;
-  rtx elt;
-
-  /* Get the counts of elements in the parallel rtx.  */
-  count = XVECLEN (op, 0);
-  /* Pick up the first element.  */
-  elt = XVECEXP (op, 0, 0);
-
-  /* Perform some quick check for the first element in the parallel rtx.  */
-  if (GET_CODE (elt) != SET
-      || count <= 1
-      || count > 8)
-    return false;
-
-  /* Pick up regno of first element for further detail checking.
-     Note that the form is different between load and store operation.  */
-  if (load_p)
-    {
-      if (GET_CODE (SET_DEST (elt)) != REG
-	  || GET_CODE (SET_SRC (elt)) != MEM)
-	return false;
-
-      first_elt_regno = REGNO (SET_DEST (elt));
-    }
-  else
+  /* If this function uses __builtin_eh_return, make stack adjustment
+     for exception handler.  */
+  if (cfun->machine->use_eh_return_p)
     {
-      if (GET_CODE (SET_SRC (elt)) != REG
-	  || GET_CODE (SET_DEST (elt)) != MEM)
-	return false;
-
-      first_elt_regno = REGNO (SET_SRC (elt));
-    }
-
-  /* Perform detail check for each element.
-     Refer to nds32-multiple.md for more information
-     about following checking.
-     The starting element of parallel rtx is index 0.  */
-  if (!nds32_consecutive_registers_load_store_p (op, load_p, 0,
-						 first_elt_regno,
-						 count))
-    return false;
-
-  /* Pass all test, this is a valid rtx.  */
-  return true;
-}
-
-/* Function to check whether the OP is a valid stack push/pop operation.
-   For a valid stack operation, it must satisfy following conditions:
-     1. Consecutive registers push/pop operations.
-     2. Valid $fp/$gp/$lp push/pop operations.
-     3. The last element must be stack adjustment rtx.
-   See the prologue/epilogue implementation for details.  */
-bool
-nds32_valid_stack_push_pop (rtx op, bool push_p)
-{
-  int index;
-  int total_count;
-  int rest_count;
-  int first_regno;
-  rtx elt;
-  rtx elt_reg;
-  rtx elt_mem;
-  rtx elt_plus;
+      /* We need to unwind the stack by the offset computed by
+	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
+	 based on SP.  Ideally we would update the SP and define the
+	 CFA along the lines of:
 
-  /* Get the counts of elements in the parallel rtx.  */
-  total_count = XVECLEN (op, 0);
+	 SP = SP + EH_RETURN_STACKADJ_RTX
+	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
 
-  /* Perform some quick check for that every element should be 'set'.  */
-  for (index = 0; index < total_count; index++)
-    {
-      elt = XVECEXP (op, 0, index);
-      if (GET_CODE (elt) != SET)
-        return false;
-    }
+	 However the dwarf emitter only understands a constant
+	 register offset.
 
-  /* For push operation, the parallel rtx looks like:
-     (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
-                     (reg:SI Rb))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
-                     (reg:SI Rb+1))
-                ...
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
-                     (reg:SI Re))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
-                     (reg:SI FP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
-                     (reg:SI GP_REGNUM))
-                (set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
-                     (reg:SI LP_REGNUM))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int -32)))])
+	 The solution chosen here is to use the otherwise $ta ($r15)
+	 as a temporary register to hold the current SP value.  The
+	 CFA is described using $ta then SP is modified.  */
 
-     For pop operation, the parallel rtx looks like:
-     (parallel [(set (reg:SI Rb)
-                     (mem (reg:SI SP_REGNUM)))
-                (set (reg:SI Rb+1)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
-                ...
-                (set (reg:SI Re)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
-                (set (reg:SI FP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
-                (set (reg:SI GP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
-                (set (reg:SI LP_REGNUM)
-                     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
-                (set (reg:SI SP_REGNUM)
-                     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
-
-  /* 1. Consecutive registers push/pop operations.
-        We need to calculate how many registers should be consecutive.
-        The $sp adjustment rtx, $fp push rtx, $gp push rtx,
-        and $lp push rtx are excluded.  */
-
-  /* Exclude last $sp adjustment rtx.  */
-  rest_count = total_count - 1;
-  /* Exclude $fp, $gp, and $lp if they are in the parallel rtx.  */
-  if (cfun->machine->fp_size)
-    rest_count--;
-  if (cfun->machine->gp_size)
-    rest_count--;
-  if (cfun->machine->lp_size)
-    rest_count--;
-
-  if (rest_count > 0)
-    {
-      elt = XVECEXP (op, 0, 0);
-      /* Pick up register element.  */
-      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
-      first_regno = REGNO (elt_reg);
-
-      /* The 'push' operation is a kind of store operation.
-         The 'pop' operation is a kind of load operation.
-         Pass corresponding false/true as second argument (bool load_p).
-         The par_index is supposed to start with index 0.  */
-      if (!nds32_consecutive_registers_load_store_p (op,
-						     !push_p ? true : false,
-						     0,
-						     first_regno,
-						     rest_count))
-        return false;
-    }
-
-  /* 2. Valid $fp/$gp/$lp push/pop operations.
-        Remember to set start index for checking them.  */
-
-  /* The rest_count is the start index for checking $fp/$gp/$lp.  */
-  index = rest_count;
-  /* If index < 0, this parallel rtx is definitely
-     not a valid stack push/pop operation.  */
-  if (index < 0)
-    return false;
+      rtx ta_reg;
+      rtx insn;
 
-  /* Check $fp/$gp/$lp one by one.
-     We use 'push_p' to pick up reg rtx and mem rtx.  */
-  if (cfun->machine->fp_size)
-    {
-      elt = XVECEXP (op, 0, index);
-      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
-      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
-      index++;
-
-      if (GET_CODE (elt_mem) != MEM
-          || GET_CODE (elt_reg) != REG
-          || REGNO (elt_reg) != FP_REGNUM)
-        return false;
-    }
-  if (cfun->machine->gp_size)
-    {
-      elt = XVECEXP (op, 0, index);
-      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
-      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
-      index++;
-
-      if (GET_CODE (elt_mem) != MEM
-          || GET_CODE (elt_reg) != REG
-          || REGNO (elt_reg) != GP_REGNUM)
-        return false;
-    }
-  if (cfun->machine->lp_size)
-    {
-      elt = XVECEXP (op, 0, index);
-      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
-      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
-      index++;
-
-      if (GET_CODE (elt_mem) != MEM
-          || GET_CODE (elt_reg) != REG
-          || REGNO (elt_reg) != LP_REGNUM)
-        return false;
-    }
-
-  /* 3. The last element must be stack adjustment rtx.
-        Its form of rtx should be:
-          (set (reg:SI SP_REGNUM)
-               (plus (reg:SI SP_REGNUM) (const_int X)))
-        The X could be positive or negative value.  */
-
-  /* Pick up the last element.  */
-  elt = XVECEXP (op, 0, total_count - 1);
-
-  /* Extract its destination and source rtx.  */
-  elt_reg  = SET_DEST (elt);
-  elt_plus = SET_SRC (elt);
-
-  /* Check this is (set (stack_reg) (plus stack_reg const)) pattern.  */
-  if (GET_CODE (elt_reg) != REG
-      || GET_CODE (elt_plus) != PLUS
-      || REGNO (elt_reg) != SP_REGNUM)
-    return false;
+      ta_reg = gen_rtx_REG (SImode, TA_REGNUM);
 
-  /* Pass all test, this is a valid rtx.  */
-  return true;
-}
+      insn = emit_move_insn (ta_reg, stack_pointer_rtx);
+      add_reg_note (insn, REG_CFA_DEF_CFA, ta_reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
 
-/* Computing the Length of an Insn.
-   Modifies the length assigned to instruction INSN.
-   LEN is the initially computed length of the insn.  */
-int
-nds32_adjust_insn_length (rtx insn, int length)
-{
-  rtx src, dst;
-
-  switch (recog_memoized (insn))
-    {
-    case CODE_FOR_move_df:
-    case CODE_FOR_move_di:
-      /* Adjust length of movd44 to 2.  */
-      src = XEXP (PATTERN (insn), 1);
-      dst = XEXP (PATTERN (insn), 0);
-
-      if (REG_P (src)
-	  && REG_P (dst)
-	  && (REGNO (src) % 2) == 0
-	  && (REGNO (dst) % 2) == 0)
-	length = 2;
-      break;
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     EH_RETURN_STACKADJ_RTX));
 
-    default:
-      break;
+      /* Ensure the assignment to $ta does not get optimized away.  */
+      emit_use (ta_reg);
     }
 
-  return length;
-}
-
-
-/* Function to check if 'bclr' instruction can be used with IVAL.  */
-int
-nds32_can_use_bclr_p (int ival)
-{
-  int one_bit_count;
-
-  /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit,
-     it means the original ival has only one 0-bit,
-     So it is ok to perform 'bclr' operation.  */
-
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival));
-
-  /* 'bclr' is a performance extension instruction.  */
-  return (TARGET_PERF_EXT && (one_bit_count == 1));
-}
-
-/* Function to check if 'bset' instruction can be used with IVAL.  */
-int
-nds32_can_use_bset_p (int ival)
-{
-  int one_bit_count;
-
-  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
-     it is ok to perform 'bset' operation.  */
-
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
-
-  /* 'bset' is a performance extension instruction.  */
-  return (TARGET_PERF_EXT && (one_bit_count == 1));
+  /* Generate return instruction.  */
+  if (!sibcall_p)
+    emit_jump_insn (gen_return_internal ());
 }
 
-/* Function to check if 'btgl' instruction can be used with IVAL.  */
-int
-nds32_can_use_btgl_p (int ival)
-{
-  int one_bit_count;
-
-  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
-     it is ok to perform 'btgl' operation.  */
-
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
-
-  /* 'btgl' is a performance extension instruction.  */
-  return (TARGET_PERF_EXT && (one_bit_count == 1));
-}
-
-/* Function to check if 'bitci' instruction can be used with IVAL.  */
-int
-nds32_can_use_bitci_p (int ival)
+/* Function for v3push prologue.  */
+void
+nds32_expand_prologue_v3push (void)
 {
-  /* If we are using V3 ISA, we have 'bitci' instruction.
-     Try to see if we can present 'andi' semantic with
-     such 'bit-clear-immediate' operation.
-     For example, 'andi $r0,$r0,0xfffffffc' can be
-     presented with 'bitci $r0,$r0,3'.  */
-  return (TARGET_ISA_V3
-	  && (ival < 0)
-	  && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode)));
-}
-
+  int fp_adjust;
+  int sp_adjust;
+  int fpr_space = 0;
+  unsigned Rb, Re;
 
-/* Return true if is load/store with SYMBOL_REF addressing mode
-   and memory mode is SImode.  */
-bool
-nds32_symbol_load_store_p (rtx insn)
-{
-  rtx mem_src = NULL_RTX;
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
 
-  switch (get_attr_type (insn))
-    {
-    case TYPE_LOAD:
-      mem_src = SET_SRC (PATTERN (insn));
-      break;
-    case TYPE_STORE:
-      mem_src = SET_DEST (PATTERN (insn));
-      break;
-    default:
-      break;
-    }
+  if (cfun->machine->callee_saved_gpr_regs_size > 0)
+    df_set_regs_ever_live (FP_REGNUM, 1);
 
-  /* Find load/store insn with addressing mode is SYMBOL_REF.  */
-  if (mem_src != NULL_RTX)
-    {
-      if ((GET_CODE (mem_src) == ZERO_EXTEND)
-	  || (GET_CODE (mem_src) == SIGN_EXTEND))
-	mem_src = XEXP (mem_src, 0);
+  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
+  if (frame_pointer_needed)
+    cfun->machine->fp_as_gp_p = false;
 
-      if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF)
-	   || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM))
-	return true;
-    }
+  /* If the function is 'naked',
+     we do not have to generate prologue code fragment.  */
+  if (cfun->machine->naked_p && !flag_pic)
+    return;
 
-  return false;
-}
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = cfun->machine->callee_saved_first_gpr_regno;
+  Re = cfun->machine->callee_saved_last_gpr_regno;
 
-/* Function to determine whether it is worth to do fp_as_gp optimization.
-   Return 0: It is NOT worth to do fp_as_gp optimization.
-   Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
-   Note that if it is worth to do fp_as_gp optimization,
-   we MUST set FP_REGNUM ever live in this function.  */
-int
-nds32_fp_as_gp_check_available (void)
-{
-  /* If there exists ANY of following conditions,
-     we DO NOT perform fp_as_gp optimization:
-       1. TARGET_FORBID_FP_AS_GP is set
-          regardless of the TARGET_FORCE_FP_AS_GP.
-       2. User explicitly uses 'naked' attribute.
-       3. Not optimize for size.
-       4. Need frame pointer.
-       5. If $fp is already required to be saved,
-          it means $fp is already choosen by register allocator.
-          Thus we better not to use it for fp_as_gp optimization.
-       6. This function is a vararg function.
-          DO NOT apply fp_as_gp optimization on this function
-          because it may change and break stack frame.
-       7. The epilogue is empty.
-          This happens when the function uses exit()
-          or its attribute is no_return.
-          In that case, compiler will not expand epilogue
-          so that we have no chance to output .omit_fp_end directive.  */
-  if (TARGET_FORBID_FP_AS_GP
-      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
-      || !optimize_size
-      || frame_pointer_needed
-      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
-      || (cfun->stdarg == 1)
-      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
-    return 0;
+  /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available,
+     where imm8u has to be 8-byte alignment.  */
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_gpr_padding_bytes
+	      + cfun->machine->callee_saved_fpr_regs_size;
 
-  /* Now we can check the possibility of using fp_as_gp optimization.  */
-  if (TARGET_FORCE_FP_AS_GP)
-    {
-      /* User explicitly issues -mforce-fp-as-gp option.  */
-      df_set_regs_ever_live (FP_REGNUM, 1);
-      return 1;
-    }
-  else
+  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
     {
-      /* In the following we are going to evaluate whether
-         it is worth to do fp_as_gp optimization.  */
-      int good_gain     = 0;
-      int symbol_count  = 0;
-
-      int threshold;
-      rtx insn;
-
-      /* We check if there already requires prologue.
-         Note that $gp will be saved in prologue for PIC code generation.
-         After that, we can set threshold by the existence of prologue.
-         Each fp-implied instruction will gain 2-byte code size
-         from gp-aware instruction, so we have following heuristics.  */
-      if (flag_pic
-	  || nds32_have_prologue_p ())
-	{
-	  /* Have-prologue:
-	       Compiler already intends to generate prologue content,
-	       so the fp_as_gp optimization will only insert
-	       'la $fp,_FP_BASE_' instruction, which will be
-	       converted into 4-byte instruction at link time.
-	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
-	  threshold = 3;
-	}
-      else
-	{
-	  /* None-prologue:
-	       Compiler originally does not generate prologue content,
-	       so the fp_as_gp optimization will NOT ONLY insert
-	       'la $fp,_FP_BASE' instruction, but also causes
-	       push/pop instructions.
-	       If we are using v3push (push25/pop25),
-	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
-	       If we are using normal push (smw/lmw),
-	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
-	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
-	}
-
-      /* We would like to traverse every instruction in this function.
-         So we need to have push_topmost_sequence()/pop_topmost_sequence()
-         surrounding our for-loop evaluation.  */
-      push_topmost_sequence ();
-      /* Counting the insn number which the addressing mode is symbol.  */
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	{
-	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
-	    symbol_count++;
+      /* We can use 'push25 Re,imm8u'.  */
 
-	  if (symbol_count == threshold)
-	    {
-	      good_gain = 1;
-	      break;
-	    }
+      /* nds32_emit_stack_v3push(last_regno, sp_adjust),
+	 the pattern 'stack_v3push' is implemented in nds32.md.  */
+      nds32_emit_stack_v3push (Rb, Re, sp_adjust);
+
+      /* Save fpu registers.  */
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	{
+	  /* Calculate fpr position.  */
+	  int fpr_position = cfun->machine->local_size
+			     + cfun->machine->out_args_size;
+	  /* Emit fpu store instruction, using [$sp + offset] store
+	     fpu registers.  */
+	  nds32_emit_push_fpr_callee_saved (fpr_position);
 	}
-      pop_topmost_sequence ();
 
-      /* Enable fp_as_gp optimization when potential gain is good enough.  */
-      if (good_gain)
+      /* Check frame_pointer_needed to see
+	 if we shall emit fp adjustment instruction.  */
+      if (frame_pointer_needed)
 	{
-	  df_set_regs_ever_live (FP_REGNUM, 1);
-	  return 1;
-	}
-    }
-
-  /* By default we return 0.  */
-  return 0;
-}
-
-
-/* Function to generate PC relative jump table.
-   Refer to nds32.md for more details.
-
-   The following is the sample for the case that diff value
-   can be presented in '.short' size.
-
-     addi    $r1, $r1, -(case_lower_bound)
-     slti    $ta, $r1, (case_number)
-     beqz    $ta, .L_skip_label
-
-     la      $ta, .L35             ! get jump table address
-     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
-     addi    $ta, $r1, $ta
-     jr5     $ta
-
-     ! jump table entry
-   L35:
-     .short  .L25-.L35
-     .short  .L26-.L35
-     .short  .L27-.L35
-     .short  .L28-.L35
-     .short  .L29-.L35
-     .short  .L30-.L35
-     .short  .L31-.L35
-     .short  .L32-.L35
-     .short  .L33-.L35
-     .short  .L34-.L35 */
-const char *
-nds32_output_casesi_pc_relative (rtx *operands)
-{
-  enum machine_mode mode;
-  rtx diff_vec;
-
-  diff_vec = PATTERN (NEXT_INSN (operands[1]));
-
-  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
-  /* Step C: "t <-- operands[1]".  */
-  output_asm_insn ("la\t$ta, %l1", operands);
-
-  /* Get the mode of each element in the difference vector.  */
-  mode = GET_MODE (diff_vec);
+	  /* adjust $fp = $sp   + 4         ($fp size)
+				+ 4         ($gp size)
+				+ 4         ($lp size)
+				+ (4 * n)   (callee-saved registers)
+				+ sp_adjust ('push25 Re,imm8u')
+	     Note: Since we use 'push25 Re,imm8u',
+		the position of stack pointer is further
+		changed after push instruction.
+		Hence, we need to take sp_adjust value
+		into consideration.  */
+	  fp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_gpr_regs_size
+		      + sp_adjust;
 
-  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
-     where m is 0, 1, or 2 to load address-diff value from table.  */
-  switch (mode)
-    {
-    case QImode:
-      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
-      break;
-    case HImode:
-      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
-      break;
-    case SImode:
-      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
-      break;
-    default:
-      gcc_unreachable ();
+	  nds32_emit_adjust_frame (hard_frame_pointer_rtx,
+				   stack_pointer_rtx,
+				   fp_adjust);
+	}
     }
-
-  /* Step E: "t <-- z + t".
-     Add table label_ref with address-diff value to
-     obtain target case address.  */
-  output_asm_insn ("add\t$ta, %2, $ta", operands);
-
-  /* Step F: jump to target with register t.  */
-  if (TARGET_16_BIT)
-    return "jr5\t$ta";
-  else
-    return "jr\t$ta";
-}
-
-/* Function to generate normal jump table.  */
-const char *
-nds32_output_casesi (rtx *operands)
-{
-  /* Step C: "t <-- operands[1]".  */
-  output_asm_insn ("la\t$ta, %l1", operands);
-
-  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
-  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
-
-  /* No need to perform Step E, which is only used for
-     pc relative jump table.  */
-
-  /* Step F: jump to target with register z.  */
-  if (TARGET_16_BIT)
-    return "jr5\t%2";
   else
-    return "jr\t%2";
-}
-
-
-/* Function to return memory format.  */
-enum nds32_16bit_address_type
-nds32_mem_format (rtx op)
-{
-  enum machine_mode mode_test;
-  int val;
-  int regno;
-
-  if (!TARGET_16_BIT)
-    return ADDRESS_NOT_16BIT_FORMAT;
-
-  mode_test = GET_MODE (op);
-
-  op = XEXP (op, 0);
-
-  /* 45 format.  */
-  if (GET_CODE (op) == REG && (mode_test == SImode))
-    return ADDRESS_REG;
-
-  /* 333 format for QI/HImode.  */
-  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
-    return ADDRESS_LO_REG_IMM3U;
-
-  /* post_inc 333 format.  */
-  if ((GET_CODE (op) == POST_INC) && (mode_test == SImode))
     {
-      regno = REGNO(XEXP (op, 0));
-
-      if (regno < 8)
-	return ADDRESS_POST_INC_LO_REG_IMM3U;
-    }
-
-  /* post_inc 333 format.  */
-  if ((GET_CODE (op) == POST_MODIFY)
-      && (mode_test == SImode)
-      && (REG_P (XEXP (XEXP (op, 1), 0)))
-      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
-    {
-      regno = REGNO (XEXP (XEXP (op, 1), 0));
-      val = INTVAL (XEXP (XEXP (op, 1), 1));
-      if (regno < 8 && val < 32)
-	return ADDRESS_POST_INC_LO_REG_IMM3U;
-    }
-
-  if ((GET_CODE (op) == PLUS)
-      && (GET_CODE (XEXP (op, 0)) == REG)
-      && (GET_CODE (XEXP (op, 1)) == CONST_INT))
-    {
-      val = INTVAL (XEXP (op, 1));
-
-      regno = REGNO(XEXP (op, 0));
-
-      if (regno > 7
-	  && regno != SP_REGNUM
-	  && regno != FP_REGNUM)
-	return ADDRESS_NOT_16BIT_FORMAT;
-
-      switch (mode_test)
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
 	{
-	case QImode:
-	  /* 333 format.  */
-	  if (val >= 0 && val < 8 && regno < 8)
-	    return ADDRESS_LO_REG_IMM3U;
-	  break;
-
-	case HImode:
-	  /* 333 format.  */
-	  if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8)
-	    return ADDRESS_LO_REG_IMM3U;
-	  break;
-
-	case SImode:
-	case SFmode:
-	case DFmode:
-	  /* fp imply 37 format.  */
-	  if ((regno == FP_REGNUM) &&
-	      (val >= 0 && val < 512 && (val % 4 == 0)))
-	    return ADDRESS_FP_IMM7U;
-	  /* sp imply 37 format.  */
-	  else if ((regno == SP_REGNUM) &&
-		   (val >= 0 && val < 512 && (val % 4 == 0)))
-	    return ADDRESS_SP_IMM7U;
-	  /* 333 format.  */
-	  else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8)
-	    return ADDRESS_LO_REG_IMM3U;
-	  break;
-
-	default:
-	  break;
+	  /* Calculate fpr space.  */
+	  fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
+		      + cfun->machine->callee_saved_fpr_regs_size;
+
+	  /* We have to use 'push25 Re, fpr_space', to pre-allocate
+	     callee saved fpr registers space.  */
+	  nds32_emit_stack_v3push (Rb, Re, fpr_space);
+	  nds32_emit_push_fpr_callee_saved (0);
 	}
-    }
-
-  return ADDRESS_NOT_16BIT_FORMAT;
-}
-
-/* Output 16-bit store.  */
-const char *
-nds32_output_16bit_store (rtx *operands, int byte)
-{
-  char pattern[100];
-  char size;
-  rtx code = XEXP (operands[0], 0);
-
-  size = nds32_byte_to_size (byte);
-
-  switch (nds32_mem_format (operands[0]))
-    {
-    case ADDRESS_REG:
-      operands[0] = code;
-      output_asm_insn ("swi450\t%1, [%0]", operands);
-      break;
-    case ADDRESS_LO_REG_IMM3U:
-      snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size);
-      output_asm_insn (pattern, operands);
-      break;
-    case ADDRESS_POST_INC_LO_REG_IMM3U:
-      snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size);
-      output_asm_insn (pattern, operands);
-      break;
-    case ADDRESS_FP_IMM7U:
-      output_asm_insn ("swi37\t%1, %0", operands);
-      break;
-    case ADDRESS_SP_IMM7U:
-      /* Get immediate value and set back to operands[1].  */
-      operands[0] = XEXP (code, 1);
-      output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands);
-      break;
-    default:
-      break;
-    }
-
-  return "";
-}
-
-/* Output 16-bit load.  */
-const char *
-nds32_output_16bit_load (rtx *operands, int byte)
-{
-  char pattern[100];
-  unsigned char size;
-  rtx code = XEXP (operands[1], 0);
-
-  size = nds32_byte_to_size (byte);
-
-  switch (nds32_mem_format (operands[1]))
-    {
-    case ADDRESS_REG:
-      operands[1] = code;
-      output_asm_insn ("lwi450\t%0, [%1]", operands);
-      break;
-    case ADDRESS_LO_REG_IMM3U:
-      snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size);
-      output_asm_insn (pattern, operands);
-      break;
-    case ADDRESS_POST_INC_LO_REG_IMM3U:
-      snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size);
-      output_asm_insn (pattern, operands);
-      break;
-    case ADDRESS_FP_IMM7U:
-      output_asm_insn ("lwi37\t%0, %1", operands);
-      break;
-    case ADDRESS_SP_IMM7U:
-      /* Get immediate value and set back to operands[0].  */
-      operands[1] = XEXP (code, 1);
-      output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands);
-      break;
-    default:
-      break;
-    }
-
-  return "";
-}
-
-/* Output 32-bit store.  */
-const char *
-nds32_output_32bit_store (rtx *operands, int byte)
-{
-  char pattern[100];
-  unsigned char size;
-  rtx code = XEXP (operands[0], 0);
-
-  size = nds32_byte_to_size (byte);
-
-  switch (GET_CODE (code))
-    {
-    case REG:
-      /* (mem (reg X))
-	 => access location by using register,
-	 use "sbi / shi / swi" */
-      snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
-      break;
+      else
+	{
+	  /* We have to use 'push25 Re,0' and
+	     expand one more instruction to adjust $sp later.  */
 
-    case SYMBOL_REF:
-    case CONST:
-      /* (mem (symbol_ref X))
-	 (mem (const (...)))
-	 => access global variables,
-	 use "sbi.gp / shi.gp / swi.gp" */
-      operands[0] = XEXP (operands[0], 0);
-      snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size);
-      break;
+	  /* nds32_emit_stack_v3push(last_regno, sp_adjust),
+	     the pattern 'stack_v3push' is implemented in nds32.md.  */
+	  nds32_emit_stack_v3push (Rb, Re, 0);
+	}
 
-    case POST_INC:
-      /* (mem (post_inc reg))
-	 => access location by using register which will be post increment,
-	 use "sbi.bi / shi.bi / swi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"s%ci.bi\t%%1, %%0, %d", size, byte);
-      break;
+      /* Check frame_pointer_needed to see
+	 if we shall emit fp adjustment instruction.  */
+      if (frame_pointer_needed)
+	{
+	  /* adjust $fp = $sp + 4        ($fp size)
+			      + 4        ($gp size)
+			      + 4        ($lp size)
+			      + (4 * n)  (callee-saved registers)
+	     Note: Since we use 'push25 Re,0',
+		   the stack pointer is just at the position
+		   after push instruction.
+		   No need to take sp_adjust into consideration.  */
+	  fp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_gpr_regs_size;
 
-    case POST_DEC:
-      /* (mem (post_dec reg))
-	 => access location by using register which will be post decrement,
-	 use "sbi.bi / shi.bi / swi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"s%ci.bi\t%%1, %%0, -%d", size, byte);
-      break;
+	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	    {
+	      /* We use 'push25 Re, fpr_space', the $sp is
+		 on callee saved fpr position, so need to consider
+		 fpr space.  */
+	      fp_adjust = fp_adjust + fpr_space;
+	    }
 
-    case POST_MODIFY:
-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
-	{
-	case REG:
-	case SUBREG:
-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
-	     => access location by using register which will be
-	     post modified with reg,
-	     use "sb.bi/ sh.bi / sw.bi" */
-	  snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
-	     => access location by using register which will be
-	     post modified with const_int,
-	     use "sbi.bi/ shi.bi / swi.bi" */
-	  snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size);
-	  break;
-	default:
-	  abort ();
+	  nds32_emit_adjust_frame (hard_frame_pointer_rtx,
+				   stack_pointer_rtx,
+				   fp_adjust);
 	}
-      break;
 
-    case PLUS:
-      switch (GET_CODE (XEXP (code, 1)))
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
 	{
-	case REG:
-	case SUBREG:
-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
-	     => access location by adding two registers,
-	     use "sb / sh / sw" */
-	  snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (plus reg const_int))
-	     => access location by adding one register with const_int,
-	     use "sbi / shi / swi" */
-	  snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
-	  break;
-	default:
-	  abort ();
+	  /* We use 'push25 Re, fpr_space',
+	     the $sp is on callee saved fpr position,
+	     no need to consider fpr space.  */
+	  sp_adjust = sp_adjust - fpr_space;
 	}
-      break;
-
-    case LO_SUM:
-      operands[2] = XEXP (code, 1);
-      operands[0] = XEXP (code, 0);
-      snprintf (pattern, sizeof (pattern),
-		"s%ci\t%%1, [%%0 + lo12(%%2)]", size);
-      break;
 
-    default:
-      abort ();
+      /* Because we use 'push25 Re,0',
+	 we need to expand one more instruction to adjust $sp.
+	 using NEGATIVE value to tell that we are decreasing address.  */
+      nds32_emit_adjust_frame (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       -1 * sp_adjust);
     }
 
-  output_asm_insn (pattern, operands);
-  return "";
+  /* Emit gp setup instructions for -fpic.  */
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    nds32_emit_load_gp ();
+
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
 }
 
-/* Output 32-bit load.  */
-const char *
-nds32_output_32bit_load (rtx *operands, int byte)
+/* Function for v3pop epilogue.  */
+void
+nds32_expand_epilogue_v3pop (bool sibcall_p)
 {
-  char pattern[100];
-  unsigned char size;
-  rtx code;
+  int sp_adjust;
+  unsigned Rb, Re;
 
-  code = XEXP (operands[1], 0);
+  /* Compute and setup stack frame size.
+     The result will be in cfun->machine.  */
+  nds32_compute_stack_frame ();
 
-  size = nds32_byte_to_size (byte);
+  /* Prevent the instruction scheduler from
+     moving instructions across the boundary.  */
+  emit_insn (gen_blockage ());
 
-  switch (GET_CODE (code))
+  /* If the function is 'naked', we do not have to generate
+     epilogue code fragment BUT 'ret' instruction.  */
+  if (cfun->machine->naked_p)
     {
-    case REG:
-      /* (mem (reg X))
-	 => access location by using register,
-	 use "lbi / lhi / lwi" */
-      snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
-      break;
+      /* Generate return instruction by using 'return_internal' pattern.
+	 Make sure this instruction is after gen_blockage().
+	 First we need to check this is a function without sibling call.  */
+      if (!sibcall_p)
+	{
+	  /* We need to further check attributes to determine whether
+	     there should be return instruction at epilogue.
+	     If the attribute naked exists but -mno-ret-in-naked-func
+	     is issued, there is NO need to generate return instruction.  */
+	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+	    return;
 
-    case SYMBOL_REF:
-    case CONST:
-      /* (mem (symbol_ref X))
-	 (mem (const (...)))
-	 => access global variables,
-	 use "lbi.gp / lhi.gp / lwi.gp" */
-      operands[1] = XEXP (operands[1], 0);
-      snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size);
-      break;
-
-    case POST_INC:
-      /* (mem (post_inc reg))
-	 => access location by using register which will be post increment,
-	 use "lbi.bi / lhi.bi / lwi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"l%ci.bi\t%%0, %%1, %d", size, byte);
-      break;
+	  emit_jump_insn (gen_return_internal ());
+	}
+      return;
+    }
 
-    case POST_DEC:
-      /* (mem (post_dec reg))
-	 => access location by using register which will be post decrement,
-	 use "lbi.bi / lhi.bi / lwi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"l%ci.bi\t%%0, %%1, -%d", size, byte);
-      break;
+  /* Get callee_first_regno and callee_last_regno.  */
+  Rb = cfun->machine->callee_saved_first_gpr_regno;
+  Re = cfun->machine->callee_saved_last_gpr_regno;
 
-    case POST_MODIFY:
-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
-	{
-	case REG:
-	case SUBREG:
-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
-	     => access location by using register which will be
-	     post modified with reg,
-	     use "lb.bi/ lh.bi / lw.bi" */
-	  snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
-	     => access location by using register which will be
-	     post modified with const_int,
-	     use "lbi.bi/ lhi.bi / lwi.bi" */
-	  snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size);
-	  break;
-	default:
-	  abort ();
-	}
-      break;
+  /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available,
+     where imm8u has to be 8-byte alignment.  */
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_gpr_padding_bytes
+	      + cfun->machine->callee_saved_fpr_regs_size;
 
-    case PLUS:
-      switch (GET_CODE (XEXP (code, 1)))
+  /* We have to consider alloca issue as well.
+     If the function does call alloca(), the stack pointer is not fixed.
+     In that case, we cannot use 'pop25 Re,imm8u' directly.
+     We have to caculate stack pointer from frame pointer
+     and then use 'pop25 Re,0'.
+     Of course, the frame_pointer_needed should be nonzero
+     if the function calls alloca().  */
+  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+      && !cfun->calls_alloca)
+    {
+      /* Restore fpu registers.  */
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
 	{
-	case REG:
-	case SUBREG:
-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
-	     use "lb / lh / lw" */
-	  snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (plus reg const_int))
-	     => access location by adding one register with const_int,
-	     use "lbi / lhi / lwi" */
-	  snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
-	  break;
-	default:
-	  abort ();
+	  int fpr_position = cfun->machine->local_size
+			     + cfun->machine->out_args_size;
+	  /* Emit fpu load instruction, using [$sp + offset] restore
+	     fpu registers.  */
+	  nds32_emit_v3pop_fpr_callee_saved (fpr_position);
 	}
-      break;
 
-    case LO_SUM:
-      operands[2] = XEXP (code, 1);
-      operands[1] = XEXP (code, 0);
-      snprintf (pattern, sizeof (pattern),
-		"l%ci\t%%0, [%%1 + lo12(%%2)]", size);
-      break;
+      /* We can use 'pop25 Re,imm8u'.  */
 
-    default:
-      abort ();
+      /* nds32_emit_stack_v3pop(last_regno, sp_adjust),
+	 the pattern 'stack_v3pop' is implementad in nds32.md.  */
+      nds32_emit_stack_v3pop (Rb, Re, sp_adjust);
     }
+  else
+    {
+      /* We have to use 'pop25 Re,0', and prior to it,
+	 we must expand one more instruction to adjust $sp.  */
 
-  output_asm_insn (pattern, operands);
-  return "";
-}
+      if (frame_pointer_needed)
+	{
+	  /* adjust $sp = $fp - 4        ($fp size)
+			      - 4        ($gp size)
+			      - 4        ($lp size)
+			      - (4 * n)  (callee-saved registers)
+	     Note: No need to adjust
+		   cfun->machine->callee_saved_area_gpr_padding_bytes,
+		   because we want to adjust stack pointer
+		   to the position for pop instruction.  */
+	  sp_adjust = cfun->machine->fp_size
+		      + cfun->machine->gp_size
+		      + cfun->machine->lp_size
+		      + cfun->machine->callee_saved_gpr_regs_size;
 
-/* Output 32-bit load with signed extension.  */
-const char *
-nds32_output_32bit_load_s (rtx *operands, int byte)
-{
-  char pattern[100];
-  unsigned char size;
-  rtx code;
+	  /* Restore fpu registers.  */
+	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	    {
+	      /* Set $sp to callee saved fpr position, we need to restore
+		 fpr registers.  */
+	      sp_adjust = sp_adjust
+			  + cfun->machine->callee_saved_area_gpr_padding_bytes
+			  + cfun->machine->callee_saved_fpr_regs_size;
 
-  code = XEXP (operands[1], 0);
+	      nds32_emit_adjust_frame (stack_pointer_rtx,
+				       hard_frame_pointer_rtx,
+				       -1 * sp_adjust);
 
-  size = nds32_byte_to_size (byte);
+	      /* Emit fpu load instruction, using [$sp + offset] restore
+		 fpu registers.  */
+	      nds32_emit_v3pop_fpr_callee_saved (0);
+	    }
+	  else
+	    {
+	      nds32_emit_adjust_frame (stack_pointer_rtx,
+				       hard_frame_pointer_rtx,
+				       -1 * sp_adjust);
+	    }
+	}
+      else
+	{
+	  /* If frame pointer is NOT needed,
+	     we cannot calculate the sp adjustment from frame pointer.
+	     Instead, we calculate the adjustment by local_size,
+	     out_args_size, and callee_saved_area_padding_bytes.
+	     Notice that such sp adjustment value may be out of range,
+	     so we have to deal with it as well.  */
 
-  switch (GET_CODE (code))
-    {
-    case REG:
-      /* (mem (reg X))
-         => access location by using register,
-         use "lbsi / lhsi" */
-      snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
-      break;
+	  /* Adjust $sp = $sp + local_size + out_args_size
+			      + callee_saved_area_gpr_padding_bytes
+			      + callee_saved_fpr_regs_size.  */
+	  sp_adjust = cfun->machine->local_size
+		      + cfun->machine->out_args_size
+		      + cfun->machine->callee_saved_area_gpr_padding_bytes
+		      + cfun->machine->callee_saved_fpr_regs_size;
 
-    case SYMBOL_REF:
-    case CONST:
-      /* (mem (symbol_ref X))
-         (mem (const (...)))
-         => access global variables,
-         use "lbsi.gp / lhsi.gp" */
-      operands[1] = XEXP (operands[1], 0);
-      snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size);
-      break;
+	  /* Restore fpu registers.  */
+	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	    {
+	      /* Set $sp to callee saved fpr position, we need to restore
+		 fpr registers.  */
+	      sp_adjust = sp_adjust
+			  - cfun->machine->callee_saved_area_gpr_padding_bytes
+			  - cfun->machine->callee_saved_fpr_regs_size;
 
-    case POST_INC:
-      /* (mem (post_inc reg))
-         => access location by using register which will be post increment,
-         use "lbsi.bi / lhsi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"l%csi.bi\t%%0, %%1, %d", size, byte);
-      break;
+	      nds32_emit_adjust_frame (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       sp_adjust);
 
-    case POST_DEC:
-      /* (mem (post_dec reg))
-         => access location by using register which will be post decrement,
-         use "lbsi.bi / lhsi.bi" */
-      snprintf (pattern, sizeof (pattern),
-		"l%csi.bi\t%%0, %%1, -%d", size, byte);
-      break;
+	      /* Emit fpu load instruction, using [$sp + offset] restore
+		 fpu registers.  */
+	      nds32_emit_v3pop_fpr_callee_saved (0);
+	    }
+	  else
+	    {
+	       /* sp_adjust value may be out of range of the addi instruction,
+		  create alternative add behavior with TA_REGNUM if necessary,
+		  using POSITIVE value to tell that we are increasing
+		  address.  */
+	      nds32_emit_adjust_frame (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       sp_adjust);
+	    }
+	}
 
-    case POST_MODIFY:
-      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+      if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
 	{
-	case REG:
-	case SUBREG:
-	  /* (mem (post_modify (reg) (plus (reg) (reg))))
-	     => access location by using register which will be
-	     post modified with reg,
-	     use "lbs.bi/ lhs.bi" */
-	  snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
-	     => access location by using register which will be
-	     post modified with const_int,
-	     use "lbsi.bi/ lhsi.bi" */
-	  snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size);
-	  break;
-	default:
-	  abort ();
+	  /* We have fpr need to restore, so $sp is set on callee saved fpr
+	     position.  And we use 'pop25 Re, fpr_space' to adjust $sp.  */
+	  int fpr_space = cfun->machine->callee_saved_area_gpr_padding_bytes
+			  + cfun->machine->callee_saved_fpr_regs_size;
+	  nds32_emit_stack_v3pop (Rb, Re, fpr_space);
 	}
-      break;
-
-    case PLUS:
-      switch (GET_CODE (XEXP (code, 1)))
+      else
 	{
-	case REG:
-	case SUBREG:
-	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
-	     use "lbs / lhs" */
-	  snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size);
-	  break;
-	case CONST_INT:
-	  /* (mem (plus reg const_int))
-	     => access location by adding one register with const_int,
-	     use "lbsi / lhsi" */
-	  snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
-	  break;
-	default:
-	  abort ();
+	  /* nds32_emit_stack_v3pop(last_regno, sp_adjust),
+	     the pattern 'stack_v3pop' is implementad in nds32.md.  */
+	  nds32_emit_stack_v3pop (Rb, Re, 0);
 	}
-      break;
-
-    case LO_SUM:
-      operands[2] = XEXP (code, 1);
-      operands[1] = XEXP (code, 0);
-      snprintf (pattern, sizeof (pattern),
-		"l%csi\t%%0, [%%1 + lo12(%%2)]", size);
-      break;
-
-    default:
-      abort ();
     }
-
-  output_asm_insn (pattern, operands);
-  return "";
+  /* Generate return instruction.  */
+  emit_jump_insn (gen_pop25return ());
 }
 
-/* Function to output stack push operation.
-   We need to deal with normal stack push multiple or stack v3push.  */
-const char *
-nds32_output_stack_push (void)
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+int
+nds32_can_use_return_insn (void)
 {
-  /* A string pattern for output_asm_insn().  */
-  char pattern[100];
-  /* The operands array which will be used in output_asm_insn().  */
-  rtx operands[3];
-  /* Pick up callee-saved first regno and last regno for further use.  */
-  int rb_regno = cfun->machine->callee_saved_regs_first_regno;
-  int re_regno = cfun->machine->callee_saved_regs_last_regno;
+  int sp_adjust;
 
-  if (TARGET_V3PUSH)
-    {
-      /* For stack v3push:
-           operands[0]: Re
-           operands[1]: imm8u */
+  /* Prior to reloading, we can't tell how many registers must be saved.
+     Thus we can not determine whether this function has null epilogue.  */
+  if (!reload_completed)
+    return 0;
 
-      /* This variable is to check if 'push25 Re,imm8u' is available.  */
-      int sp_adjust;
+  /* If attribute 'naked' appears but -mno-ret-in-naked-func is used,
+     we cannot use return instruction.  */
+  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+    return 0;
 
-      /* Set operands[0].  */
-      operands[0] = gen_rtx_REG (SImode, re_regno);
+  sp_adjust = cfun->machine->local_size
+	      + cfun->machine->out_args_size
+	      + cfun->machine->callee_saved_area_gpr_padding_bytes
+	      + cfun->machine->callee_saved_fpr_regs_size;
+  if (!cfun->machine->fp_as_gp_p
+      && satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+      && !cfun->calls_alloca
+      && NDS32_V3PUSH_AVAILABLE_P
+      && !(TARGET_HARD_FLOAT
+	   && (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)))
+    return 1;
 
-      /* Check if we can generate 'push25 Re,imm8u',
-         otherwise, generate 'push25 Re,0'.  */
-      sp_adjust = cfun->machine->local_size
-		  + cfun->machine->out_args_size
-		  + cfun->machine->callee_saved_area_padding_bytes;
-      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
-	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
-	operands[1] = GEN_INT (sp_adjust);
-      else
-	operands[1] = GEN_INT (0);
+  /* If no stack was created, two conditions must be satisfied:
+     1. This is a naked function.
+	So there is no callee-saved, local size, or outgoing size.
+     2. This is NOT a variadic function.
+	So there is no pushing arguement registers into the stack.  */
+  return ((cfun->machine->naked_p && (cfun->machine->va_args_size == 0)));
+}
 
-      /* Create assembly code pattern.  */
-      snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1");
-    }
+enum machine_mode
+nds32_case_vector_shorten_mode (int min_offset, int max_offset,
+				rtx body ATTRIBUTE_UNUSED)
+{
+  if (min_offset < 0 || max_offset >= 0x2000)
+    return SImode;
   else
     {
-      /* For normal stack push multiple:
-         operands[0]: Rb
-         operands[1]: Re
-         operands[2]: En4 */
-
-      /* This variable is used to check if we only need to generate En4 field.
-         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
-      int push_en4_only_p = 0;
-
-      /* Set operands[0] and operands[1].  */
-      operands[0] = gen_rtx_REG (SImode, rb_regno);
-      operands[1] = gen_rtx_REG (SImode, re_regno);
-
-      /* 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
-      if (!cfun->machine->fp_size
-	  && !cfun->machine->gp_size
-	  && !cfun->machine->lp_size
-	  && REGNO (operands[0]) == SP_REGNUM
-	  && REGNO (operands[1]) == SP_REGNUM)
-	{
-	  /* No need to generate instruction.  */
-	  return "";
-	}
-      else
-	{
-	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
-	  if (REGNO (operands[0]) == SP_REGNUM
-	      && REGNO (operands[1]) == SP_REGNUM)
-	    push_en4_only_p = 1;
-
-	  /* Create assembly code pattern.
-	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
-	  snprintf (pattern, sizeof (pattern),
-		    "push.s\t%s{%s%s%s }",
-		    push_en4_only_p ? "" : "%0, %1, ",
-		    cfun->machine->fp_size ? " $fp" : "",
-		    cfun->machine->gp_size ? " $gp" : "",
-		    cfun->machine->lp_size ? " $lp" : "");
-	}
-    }
-
-  /* We use output_asm_insn() to output assembly code by ourself.  */
-  output_asm_insn (pattern, operands);
-  return "";
-}
-
-/* Function to output stack pop operation.
-   We need to deal with normal stack pop multiple or stack v3pop.  */
-const char *
-nds32_output_stack_pop (void)
-{
-  /* A string pattern for output_asm_insn().  */
-  char pattern[100];
-  /* The operands array which will be used in output_asm_insn().  */
-  rtx operands[3];
-  /* Pick up callee-saved first regno and last regno for further use.  */
-  int rb_regno = cfun->machine->callee_saved_regs_first_regno;
-  int re_regno = cfun->machine->callee_saved_regs_last_regno;
-
-  if (TARGET_V3PUSH)
-    {
-      /* For stack v3pop:
-           operands[0]: Re
-           operands[1]: imm8u */
-
-      /* This variable is to check if 'pop25 Re,imm8u' is available.  */
-      int sp_adjust;
-
-      /* Set operands[0].  */
-      operands[0] = gen_rtx_REG (SImode, re_regno);
-
-      /* Check if we can generate 'pop25 Re,imm8u',
-         otherwise, generate 'pop25 Re,0'.
-         We have to consider alloca issue as well.
-         If the function does call alloca(), the stack pointer is not fixed.
-         In that case, we cannot use 'pop25 Re,imm8u' directly.
-         We have to caculate stack pointer from frame pointer
-         and then use 'pop25 Re,0'.  */
-      sp_adjust = cfun->machine->local_size
-		  + cfun->machine->out_args_size
-		  + cfun->machine->callee_saved_area_padding_bytes;
-      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
-	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
-	  && !cfun->calls_alloca)
-	operands[1] = GEN_INT (sp_adjust);
+      /* The jump table maybe need to 2 byte alignment,
+	 so reserved 1 byte for check max_offset.  */
+      if (max_offset >= 0xff)
+	return HImode;
       else
-	operands[1] = GEN_INT (0);
-
-      /* Create assembly code pattern.  */
-      snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
+	return QImode;
     }
-  else
-    {
-      /* For normal stack pop multiple:
-         operands[0]: Rb
-         operands[1]: Re
-         operands[2]: En4 */
-
-      /* This variable is used to check if we only need to generate En4 field.
-         As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
-      int pop_en4_only_p = 0;
-
-      /* Set operands[0] and operands[1].  */
-      operands[0] = gen_rtx_REG (SImode, rb_regno);
-      operands[1] = gen_rtx_REG (SImode, re_regno);
-
-      /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
-      if (!cfun->machine->fp_size
-	  && !cfun->machine->gp_size
-	  && !cfun->machine->lp_size
-	  && REGNO (operands[0]) == SP_REGNUM
-	  && REGNO (operands[1]) == SP_REGNUM)
-	{
-	  /* No need to generate instruction.  */
-	  return "";
-	}
-      else
-	{
-	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
-	  if (REGNO (operands[0]) == SP_REGNUM
-	      && REGNO (operands[1]) == SP_REGNUM)
-	    pop_en4_only_p = 1;
+}
 
-	  /* Create assembly code pattern.
-	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
-	  snprintf (pattern, sizeof (pattern),
-		    "pop.s\t%s{%s%s%s }",
-		    pop_en4_only_p ? "" : "%0, %1, ",
-		    cfun->machine->fp_size ? " $fp" : "",
-		    cfun->machine->gp_size ? " $gp" : "",
-		    cfun->machine->lp_size ? " $lp" : "");
-	}
-    }
+static bool
+nds32_cannot_copy_insn_p (rtx insn)
+{
+  /* The hwloop_cfg insn cannot be copied.  */
+  if (recog_memoized (insn) == CODE_FOR_hwloop_cfg)
+    return true;
 
-  /* We use output_asm_insn() to output assembly code by ourself.  */
-  output_asm_insn (pattern, operands);
-  return "";
+  return false;
 }
 
-/* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte.  */
+/* Return alignment for the label.  */
 int
 nds32_target_alignment (rtx label)
 {
   rtx insn;
 
-  if (optimize_size)
+  if (!NDS32_ALIGN_P ())
     return 0;
 
   insn = next_active_insn (label);
 
-  if (insn == 0)
+  /* Always align to 4 byte when first instruction after label is jump
+     instruction since length for that might changed, so let's always align
+     it for make sure we don't lose any perfomance here.  */
+  if (insn == 0
+      || (get_attr_length (insn) == 2
+	  && !JUMP_P (insn) && !CALL_P (insn)))
     return 0;
-  else if ((get_attr_length (insn) % 4) == 0)
+  else
     return 2;
+}
+
+/* Return alignment for data.  */
+unsigned int
+nds32_data_alignment (tree data,
+		      unsigned int basic_align)
+{
+  if ((basic_align < BITS_PER_WORD)
+      && (TREE_CODE (data) == ARRAY_TYPE
+	 || TREE_CODE (data) == UNION_TYPE
+	 || TREE_CODE (data) == RECORD_TYPE))
+    return BITS_PER_WORD;
   else
-    return 0;
+    return basic_align;
+}
+
+/* Return alignment for constant value.  */
+unsigned int
+nds32_constant_alignment (tree constant,
+			  unsigned int basic_align)
+{
+  /* Make string literal and constant for constructor to word align.  */
+  if (((TREE_CODE (constant) == STRING_CST
+	|| TREE_CODE (constant) == CONSTRUCTOR
+	|| TREE_CODE (constant) == UNION_TYPE
+	|| TREE_CODE (constant) == RECORD_TYPE
+	|| TREE_CODE (constant) == ARRAY_TYPE)
+       && basic_align < BITS_PER_WORD))
+    return BITS_PER_WORD;
+  else
+    return basic_align;
+}
+
+/* Return alignment for local variable.  */
+unsigned int
+nds32_local_alignment (tree local ATTRIBUTE_UNUSED,
+		       unsigned int basic_align)
+{
+  bool at_least_align_to_word = false;
+  /* Make local array, struct and union at least align to word for make
+     sure it can unroll memcpy when initialize by constant.  */
+  switch (TREE_CODE (local))
+    {
+    case ARRAY_TYPE:
+    case RECORD_TYPE:
+    case UNION_TYPE:
+      at_least_align_to_word = true;
+      break;
+    default:
+      at_least_align_to_word = false;
+      break;
+    }
+  if (at_least_align_to_word
+      && (basic_align < BITS_PER_WORD))
+    return BITS_PER_WORD;
+  else
+    return basic_align;
 }
 
 /* ------------------------------------------------------------------------ */
 
-/* PART 5: Initialize target hook structure and definitions.  */
+/* PART 6: Initialize target hook structure and definitions.  */
 
 /* Controlling the Compilation Driver.  */
 
@@ -5445,6 +5977,9 @@
 #define TARGET_PROMOTE_FUNCTION_MODE \
   default_promote_function_mode_always_promote
 
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK nds32_expand_to_rtl_hook
+
 
 /* Layout of Source Language Data Types.  */
 
@@ -5453,6 +5988,9 @@
 
 /* -- Basic Characteristics of Registers.  */
 
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE nds32_conditional_register_usage
+
 /* -- Order of Allocation of Registers.  */
 
 /* -- How Values Fit in Registers.  */
@@ -5464,6 +6002,9 @@
 
 /* Register Classes.  */
 
+#undef TARGET_PREFERRED_RENAME_CLASS
+#define TARGET_PREFERRED_RENAME_CLASS nds32_preferred_rename_class
+
 #undef TARGET_CLASS_MAX_NREGS
 #define TARGET_CLASS_MAX_NREGS nds32_class_max_nregs
 
@@ -5499,12 +6040,21 @@
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG nds32_function_arg
 
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK nds32_must_pass_in_stack
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES nds32_arg_partial_bytes
+
 #undef TARGET_FUNCTION_ARG_ADVANCE
 #define TARGET_FUNCTION_ARG_ADVANCE nds32_function_arg_advance
 
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary
 
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p
+
 /* -- How Scalar Function Values Are Returned.  */
 
 #undef TARGET_FUNCTION_VALUE
@@ -5518,6 +6068,9 @@
 
 /* -- How Large Values Are Returned.  */
 
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY nds32_return_in_memory
+
 /* -- Caller-Saves Register Allocation.  */
 
 /* -- Function Entry and Exit.  */
@@ -5544,6 +6097,9 @@
 
 /* -- Permitting tail calls.  */
 
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL nds32_function_ok_for_sibcall
+
 #undef TARGET_WARN_FUNC_RETURN
 #define TARGET_WARN_FUNC_RETURN nds32_warn_func_return
 
@@ -5552,6 +6108,9 @@
 
 /* Implementing the Varargs Macros.  */
 
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS nds32_setup_incoming_varargs
+
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING nds32_strict_argument_naming
 
@@ -5573,6 +6132,21 @@
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p
 
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address
+
 
 /* Anchored Addresses.  */
 
@@ -5583,6 +6157,9 @@
 
 /* -- Representation of condition codes using registers.  */
 
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON nds32_canonicalize_comparison
+
 /* -- Macros to control conditional execution.  */
 
 
@@ -5603,9 +6180,15 @@
 
 /* Adjusting the Instruction Scheduler.  */
 
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST nds32_sched_adjust_cost
+
 
 /* Dividing the Output into Sections (Texts, Data, . . . ).  */
 
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO nds32_encode_section_info
+
 
 /* Position Independent Code.  */
 
@@ -5627,6 +6210,9 @@
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra
+
 /* -- Output of Uninitialized Variables.  */
 
 /* -- Output and Generation of Labels.  */
@@ -5649,6 +6235,9 @@
 
 /* -- Assembler Commands for Exception Regions.  */
 
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN nds32_dwarf_register_span
+
 /* -- Assembler Commands for Alignment.  */
 
 
@@ -5664,6 +6253,11 @@
 
 /* -- Macros for SDB and DWARF Output.  */
 
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
 /* -- Macros for VMS Debug Format.  */
 
 
@@ -5693,6 +6287,9 @@
 
 /* Emulating TLS.  */
 
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS TARGET_LINUX_ABI
+
 
 /* Defining coprocessor specifics for MIPS targets.  */
 
@@ -5708,12 +6305,36 @@
 
 /* Miscellaneous Parameters.  */
 
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS nds32_md_asm_clobbers
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG nds32_machine_dependent_reorg
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS nds32_init_builtins
 
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL nds32_builtin_decl
+
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN nds32_expand_builtin
 
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION nds32_have_conditional_execution
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P nds32_can_use_doloop_p
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP nds32_invalid_within_doloop
+
+#undef  TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P nds32_cannot_copy_insn_p
+
 
 /* ------------------------------------------------------------------------ */
 
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-cost.c gcc-4.9.4/gcc/config/nds32/nds32-cost.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-cost.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-cost.c	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,734 @@
+/* Subroutines used for calculate rtx costs of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+#include "tree-pass.h"
+
+/* ------------------------------------------------------------------------ */
+
+typedef bool (*rtx_cost_func) (rtx, int, int, int, int*);
+
+struct rtx_cost_model_t {
+  rtx_cost_func speed_prefer;
+  rtx_cost_func size_prefer;
+};
+
+static rtx_cost_model_t rtx_cost_model;
+
+static int insn_size_16bit; /* Initial at nds32_init_rtx_costs.  */
+static const int insn_size_32bit = 4;
+
+static bool
+nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED,
+			      int code,
+			      int outer_code ATTRIBUTE_UNUSED,
+			      int opno ATTRIBUTE_UNUSED,
+			      int *total)
+{
+  rtx op0;
+  rtx op1;
+  enum machine_mode mode = GET_MODE (x);
+  /* Scale cost by mode size.  */
+  int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+
+  switch (code)
+    {
+    case USE:
+      /* Used in combine.c as a marker.  */
+      *total = 0;
+      return true;
+
+    case CONST_INT:
+      /* When not optimizing for size, we care more about the cost
+	 of hot code, and hot code is often in a loop.  If a constant
+	 operand needs to be forced into a register, we will often be
+	 able to hoist the constant load out of the loop, so the load
+	 should not contribute to the cost.  */
+      if (outer_code == SET || outer_code == PLUS)
+	*total = satisfies_constraint_Is20 (x) ? 0 : 4;
+      else if (outer_code == AND || outer_code == IOR || outer_code == XOR
+	       || outer_code == MINUS)
+	*total = satisfies_constraint_Iu15 (x) ? 0 : 4;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+	*total = satisfies_constraint_Iu05 (x) ? 0 : 4;
+      else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
+	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
+	*total = satisfies_constraint_Is16 (x) ? 0 : 4;
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST:
+    case LO_SUM:
+    case HIGH:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case SET:
+      op0 = SET_DEST (x);
+      op1 = SET_SRC (x);
+      mode = GET_MODE (op0);
+      /* Scale cost by mode size.  */
+      cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+
+      switch (GET_CODE (op1))
+	{
+	case REG:
+	case SUBREG:
+	  /* Register move and Store instructions.  */
+	  if ((REG_P (op0) || MEM_P (op0))
+	      && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case MEM:
+	  /* Load instructions.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST_INT:
+	  /* movi instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    {
+	      if (satisfies_constraint_Is20 (op1))
+		*total = COSTS_N_INSNS (1) - 1;
+	      else
+		*total = COSTS_N_INSNS (2);
+	    }
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  /* la instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1) - 1;
+	  else
+	    *total = cost;
+	  return true;
+
+	default:
+	  *total = cost;
+	  return true;
+	}
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (op1) == CONST_INT
+		&& satisfies_constraint_Is15 (op1))
+		|| REG_P (op1))
+	/* ADD instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ADD instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case MINUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (op0) == CONST_INT
+		&& satisfies_constraint_Is15 (op0))
+		|| REG_P (op0))
+	/* SUB instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SUB instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case TRUNCATE:
+      /* TRUNCATE and AND behavior is same. */
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (NDS32_EXT_DSP_P ())
+	{
+	  /* We prefer (and (ior) (ior)) than (ior (and) (and)) for
+	     synthetize pk** and insb instruction.  */
+	  if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR)
+	    return COSTS_N_INSNS (1);
+
+	  if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND)
+	    return COSTS_N_INSNS (10);
+	}
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (op1) == CONST_INT
+	       && satisfies_constraint_Iu15 (op1))
+	       || REG_P (op1))
+	/* AND, OR, XOR instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (code == AND || GET_CODE (op0) == NOT)
+	/* BITC instruction */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* AND, OR, XOR instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case MULT:
+      if (GET_MODE (x) == DImode
+	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+	  || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* MUL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+
+      if (TARGET_MUL_SLOW)
+	*total += COSTS_N_INSNS (4);
+
+      return true;
+
+    case LSHIFTRT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS
+	       || outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SRL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SRL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	/* ALU_SHIFT */
+	*total += COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFTRT:
+    case ROTATERT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* ROTR, SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ROTR, SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case LT:
+    case LTU:
+      if (outer_code == SET)
+	{
+	  if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && satisfies_constraint_Iu15 (XEXP (x, 1)))
+	      || REG_P (XEXP (x, 1)))
+	    /* SLT, SLTI instructions */
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    /* SLT, SLT instructions: IMM out of range.  */
+	    *total = COSTS_N_INSNS (2);
+	}
+      else
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case EQ:
+    case NE:
+    case GE:
+    case LE:
+    case GT:
+      /* branch */
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == LABEL_REF)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	/* cmovz, cmovn instructions */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case LABEL_REF:
+      if (outer_code == IF_THEN_ELSE)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	/* Using memory access. */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* Zero extend and sign extend instructions.  */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case NEG:
+    case NOT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+
+    case CALL:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CLZ:
+    case SMIN:
+    case SMAX:
+    case ZERO_EXTRACT:
+      if (TARGET_EXT_PERF)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (3);
+      return true;
+
+    default:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    }
+}
+
+static bool
+nds32_rtx_costs_size_prefer (rtx x,
+			     int code,
+			     int outer_code,
+			     int opno ATTRIBUTE_UNUSED,
+			     int *total)
+{
+  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
+     We treat it as 4-byte cost for each instruction
+     under code size consideration.  */
+  switch (code)
+    {
+    case SET:
+      /* For 'SET' rtx, we need to return false
+	 so that it can recursively calculate costs.  */
+      return false;
+
+    case USE:
+      /* Used in combine.c as a marker.  */
+      *total = 0;
+      break;
+
+    case CONST_INT:
+      /* All instructions involving constant operation
+	 need to be considered for cost evaluation.  */
+      if (outer_code == SET)
+	{
+	  /* (set X imm5s), use movi55, 2-byte cost.
+	     (set X imm20s), use movi, 4-byte cost.
+	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
+	  if (satisfies_constraint_Is05 (x))
+	    *total = insn_size_16bit;
+	  else if (satisfies_constraint_Is20 (x))
+	    *total = insn_size_32bit;
+	  else
+	    *total = insn_size_32bit * 2;
+	}
+      else if (outer_code == PLUS || outer_code == MINUS)
+	{
+	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu05 (x))
+	    *total = insn_size_16bit;
+	  else
+	    *total = insn_size_32bit;
+	}
+      else if (outer_code == ASHIFT)
+	{
+	  /* Possible slli333, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu03 (x))
+	    *total = insn_size_16bit;
+	  else
+	    *total = insn_size_32bit;
+	}
+      else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
+	{
+	  /* Possible srai45 or srli45, 2-byte cost.
+	     General case, cost 1 instruction with 4-byte.  */
+	  if (satisfies_constraint_Iu05 (x))
+	    *total = insn_size_16bit;
+	  else
+	    *total = insn_size_32bit;
+	}
+      else
+	{
+	  /* For other cases, simply set it 4-byte cost.  */
+	  *total = insn_size_32bit;
+	}
+      break;
+
+    case CONST_DOUBLE:
+      /* It requires high part and low part processing, set it 8-byte cost.  */
+      *total = insn_size_32bit * 2;
+      break;
+
+    case CONST:
+    case SYMBOL_REF:
+      *total = insn_size_32bit * 2;
+      break;
+
+    default:
+      /* For other cases, generally we set it 4-byte cost
+	 and stop resurively traversing.  */
+      *total = insn_size_32bit;
+      break;
+    }
+
+  return true;
+}
+
+void
+nds32_init_rtx_costs (void)
+{
+  rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer;
+  rtx_cost_model.size_prefer  = nds32_rtx_costs_size_prefer;
+
+  if (TARGET_16_BIT)
+    insn_size_16bit = 2;
+  else
+    insn_size_16bit = 4;
+}
+
+/* This target hook describes the relative costs of RTL expressions.
+   Return 'true' when all subexpressions of x have been processed.
+   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
+   Refer to gcc/rtlanal.c for more information.  */
+bool
+nds32_rtx_costs_impl (rtx x,
+		      int code,
+		      int outer_code,
+		      int opno,
+		      int *total,
+		      bool speed)
+{
+  /* According to 'speed', use suitable cost model section.  */
+  if (speed)
+    return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total);
+  else
+    return rtx_cost_model.size_prefer(x, code, outer_code, opno, total);
+}
+
+
+int nds32_address_cost_speed_prefer (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 1;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+
+}
+
+int nds32_address_cost_speed_fwprop (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return 0;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1);
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	{
+	  /* If it is possible to be lwi333/swi333 form,
+	     make it 2-byte cost.  */
+	  if (satisfies_constraint_Iu03 (plus1))
+	    return (COSTS_N_INSNS (1) - 2);
+	  else
+	    return COSTS_N_INSNS (1);
+	}
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+}
+
+
+int nds32_address_cost_size_prefer (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return 0;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 1;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	{
+	  /* If it is possible to be lwi333/swi333 form,
+	     make it 2-byte cost.  */
+	  if (satisfies_constraint_Iu03 (plus1))
+	    return (COSTS_N_INSNS (1) - 2);
+	  else
+	    return COSTS_N_INSNS (1) - 1;
+	}
+
+      /* (plus (reg) (mult (reg) (const))) */
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return (COSTS_N_INSNS (1) - 1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+
+}
+
+int nds32_address_cost_impl (rtx address,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     addr_space_t as ATTRIBUTE_UNUSED,
+			     bool speed_p)
+{
+  if (speed_p)
+    {
+      if (current_pass->tv_id == TV_FWPROP)
+	return nds32_address_cost_speed_fwprop (address);
+      else
+	return nds32_address_cost_speed_prefer (address);
+    }
+  else
+    return nds32_address_cost_size_prefer (address);
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-doubleword.md gcc-4.9.4/gcc/config/nds32/nds32-doubleword.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-doubleword.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-doubleword.md	2016-08-08 20:37:45.498269782 +0200
@@ -1,5 +1,5 @@
 ;; DImode/DFmode patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -23,7 +23,8 @@
 ;; Move DImode/DFmode instructions.
 ;; -------------------------------------------------------------
 
-
+;; Do *NOT* try to split DI/DFmode before reload since LRA seem
+;; still buggy for such behavior at least at gcc 4.8.2...
 (define_expand "movdi"
   [(set (match_operand:DI 0 "general_operand" "")
 	(match_operand:DI 1 "general_operand" ""))]
@@ -46,144 +47,77 @@
 
 
 (define_insn "move_<mode>"
-  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m")
-	(match_operand:DIDF 1 "general_operand"      " r, i, m, r"))]
-  ""
+  [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
+	(match_operand:DIDF 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
 {
-  rtx addr;
-  rtx otherops[5];
-
   switch (which_alternative)
     {
     case 0:
       return "movd44\t%0, %1";
-
     case 1:
       /* reg <- const_int, we ask gcc to split instruction.  */
       return "#";
-
     case 2:
-      /* Refer to nds32_legitimate_address_p() in nds32.c,
-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
-         as address rtx for DImode/DFmode memory access.  */
-      addr = XEXP (operands[1], 0);
-
-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
-      otherops[2] = addr;
-
-      if (REG_P (addr))
-	{
-	  /* (reg) <- (mem (reg)) */
-	  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
-	}
-      else if (GET_CODE (addr) == PLUS)
-	{
-	  /* (reg) <- (mem (plus (reg) (const_int))) */
-	  rtx op0 = XEXP (addr, 0);
-	  rtx op1 = XEXP (addr, 1);
-
-	  if (REG_P (op0))
-	    {
-	      otherops[2] = op0;
-	      otherops[3] = op1;
-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
-	    }
-	  else
-	    {
-	      otherops[2] = op1;
-	      otherops[3] = op0;
-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
-	    }
-
-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
-	    {
-	      output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops);
-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
-	    }
-	  else
-	    {
-	      output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops);
-	      output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops);
-	    }
-	}
-      else
-	{
-	  /* (reg) <- (mem (symbol_ref ...))
-	     (reg) <- (mem (const ...)) */
-	  output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops);
-	  output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops);
-	}
-
-      /* We have already used output_asm_insn() by ourself,
-         so return an empty string.  */
-      return "";
-
+      /* The memory format is (mem (reg)),
+	 we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
     case 3:
-      /* Refer to nds32_legitimate_address_p() in nds32.c,
-         we only allow "reg", "symbol_ref", "const", and "reg + const_int"
-         as address rtx for DImode/DFmode memory access.  */
-      addr = XEXP (operands[0], 0);
-
-      otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
-      otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
-      otherops[2] = addr;
-
-      if (REG_P (addr))
-	{
-	  /* (mem (reg)) <- (reg) */
-	  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
-	}
-      else if (GET_CODE (addr) == PLUS)
-	{
-	  /* (mem (plus (reg) (const_int))) <- (reg) */
-	  rtx op0 = XEXP (addr, 0);
-	  rtx op1 = XEXP (addr, 1);
-
-	  if (REG_P (op0))
-	    {
-	      otherops[2] = op0;
-	      otherops[3] = op1;
-	      otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode);
-	    }
-	  else
-	    {
-	      otherops[2] = op1;
-	      otherops[3] = op0;
-	      otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode);
-	    }
-
-	  /* To avoid base overwrite when REGNO(%0) == REGNO(%2).  */
-	  if (REGNO (otherops[0]) != REGNO (otherops[2]))
-	    {
-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
-	    }
-	  else
-	    {
-	      output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops);
-	      output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops);
-	    }
-	}
-      else
-	{
-	  /* (mem (symbol_ref ...)) <- (reg)
-	     (mem (const ...))      <- (reg) */
-	  output_asm_insn ("swi.gp\t%0, [ + %2]", otherops);
-	  output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops);
-	}
-
-      /* We have already used output_asm_insn() by ourself,
-         so return an empty string.  */
-      return "";
-
+      /* We haven't 64-bit load instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+	 we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "move,move,move,move")
-   (set_attr "length" "   4,  16,   8,   8")])
+  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+		     (const_int 4)
+		     (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
 
 (define_split
   [(set (match_operand:DIDF 0 "register_operand"     "")
@@ -217,7 +151,9 @@
   [(set (match_operand:DIDF 0 "register_operand" "")
 	(match_operand:DIDF 1 "register_operand" ""))]
   "reload_completed
-   && (TARGET_ISA_V2 || !TARGET_16_BIT)"
+   && (TARGET_ISA_V2 || !TARGET_16_BIT)
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
 {
@@ -239,6 +175,28 @@
     }
 })
 
+(define_split
+  [(set (match_operand:DIDF 0 "nds32_general_register_operand" "")
+	(match_operand:DIDF 1 "memory_operand" ""))]
+  "reload_completed
+   && !satisfies_constraint_Da (operands[1])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  nds32_spilt_doubleword (operands, true);
+})
+
+(define_split
+  [(set (match_operand:DIDF 0  "memory_operand" "")
+	(match_operand:DIDF 1  "nds32_general_register_operand" ""))]
+  "reload_completed
+   && !satisfies_constraint_Da (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  nds32_spilt_doubleword (operands, false);
+})
+
 ;; -------------------------------------------------------------
 ;; Boolean DImode instructions.
 ;; -------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-dspext.md gcc-4.9.4/gcc/config/nds32/nds32-dspext.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-dspext.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-dspext.md	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,5177 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (GET_CODE (operands[1]) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (operands[1])
+      && !satisfies_constraint_CVhi (operands[1]))
+    {
+      HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]);
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p ()
+		? gen_reg_rtx (SImode)
+		: simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+
+      emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode));
+      convert_move (operands[0], tmp_rtx, false);
+      DONE;
+    }
+
+  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q, A")
+	(match_operand:VQIHI 1 "nds32_vmove_operand"  " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f,  r, *f,  Q, *f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], <MODE>mode)
+       || register_operand(operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 12:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 13:
+      return "movpi45\t%0, %1";
+    case 14:
+      return "movi55\t%0, %1";
+    case 15:
+      return "movi\t%0, %1";
+    case 16:
+      return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
+    case 22:
+      return "mtusr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,unknown,unknown,unknown,unknown,unknown, alu")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,      4,      4,      4,      4,      4,   4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1,    fpu,    fpu,    fpu,    fpu,    fpu,  v1")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+	(match_operand:V2SI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (V2SImode, operands[1]);
+})
+
+(define_insn "*movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
+	(match_operand:V2SI 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], V2SImode)
+       || register_operand(operands[1], V2SImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "movd44\t%0, %1";
+    case 1:
+      /* reg <- const_int, we ask gcc to split instruction.  */
+      return "#";
+    case 2:
+      /* The memory format is (mem (reg)),
+	 we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
+    case 3:
+      /* We haven't 64-bit load instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+	 we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+		     (const_int 4)
+		     (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  rtx addr;
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
+    }
+  DONE;
+})
+
+(define_expand "unaligned_load<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand" "=r")
+	(unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_load (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1])));
+  DONE;
+})
+
+(define_insn "unaligned_load_w<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand"                          "=  r")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store<mode>"
+  [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_w<mode>"
+  [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand"      "=Umw")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "<uk>add<mode>3"
+  [(set (match_operand:VQIHIDI 0 "register_operand"                    "=r")
+	(all_plus:VQIHIDI (match_operand:VQIHIDI 1 "register_operand" " r")
+			  (match_operand:VQIHIDI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>add<bits> %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd8 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uraddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd8 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd16 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "uraddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd16 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "radddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd64 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uradddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd64 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>sub<mode>3"
+  [(set (match_operand:VQIHIDI 0 "register_operand"                     "=r")
+	(all_minus:VQIHIDI (match_operand:VQIHIDI 1 "register_operand" " r")
+			   (match_operand:VQIHIDI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>sub<bits> %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "rsubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub8 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub8 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub16 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub16 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub64 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+
+(define_insn "ursubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub64 %0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "cras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "cras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+)
+
+(define_insn "cras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+)
+
+(define_expand "kcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+)
+
+(define_insn "kcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+)
+
+(define_expand "ukcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+)
+
+(define_insn "ukcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+)
+
+(define_expand "crsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "crsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+)
+
+(define_insn "crsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+)
+
+(define_expand "kcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+)
+
+(define_insn "kcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+)
+
+(define_expand "ukcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+)
+
+(define_insn "ukcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+)
+
+(define_expand "rcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+)
+
+(define_insn "rcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+)
+
+(define_expand "urcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+)
+
+(define_insn "urcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+)
+
+(define_expand "rcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+)
+
+(define_insn "rcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+)
+
+(define_expand "urcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+)
+
+(define_insn "urcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+)
+
+(define_expand "<shift>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "")
+	(shifts:V2HI (match_operand:V2HI 1 "register_operand"     "")
+		     (match_operand:SI   2 "nds32_rimm4u_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*ashlv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r, r")
+	(ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   slli16\t%0, %1, %2
+   sll16\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "kslli16"
+  [(set (match_operand:V2HI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+			(match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli16\t%0, %1, %2
+   ksll16\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "*ashrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16\t%0, %1, %2
+   sra16\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "sra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16.u\t%0, %1, %2
+   sra16.u\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "*lshrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16\t%0, %1, %2
+   srl16\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "srl16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16.u\t%0, %1, %2
+   srl16.u\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+(define_insn "kslra16"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+			 (neg:SI (match_dup 2)))
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "kslra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+				       (neg:SI (match_dup 2)))]
+		       UNSPEC_ROUND)
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "cmpeq<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "cmpeq<bits>\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "scmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "scmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "sclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "uclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIP))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "khm16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHM))]
+  "NDS32_EXT_DSP_P ()"
+  "khm16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "khmx16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHMX))]
+  "NDS32_EXT_DSP_P ()"
+  "khmx16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_setv4qi"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:QI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 4)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_expand "insb"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:V4QI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0)
+    gcc_unreachable ();
+
+  rtx src = gen_reg_rtx (QImode);
+
+  convert_move (src, operands[2], false);
+
+  HOST_WIDE_INT selector_index;
+  /* Big endian need reverse index. */
+  if (TARGET_BIG_ENDIAN)
+    selector_index = 4 - INTVAL (operands[3]) - 1;
+  else
+    selector_index = INTVAL (operands[3]);
+  rtx selector = gen_int_mode (1 << selector_index, SImode);
+  emit_insn (gen_vec_setv4qi_internal (operands[0], src,
+				       operands[1], selector));
+  DONE;
+})
+
+(define_expand "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "nds32_insv_operand" ""))
+	(match_operand:SI 3 "register_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[1]) != 8)
+    FAIL;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+
+(define_insn "insvsi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(match_operand:SI 2                  "register_operand"    "r"))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "insvsiqi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(zero_extend:SI (match_operand:QI 2 "register_operand"    "r")))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+;; Intermedium pattern for synthetize insvsiqi_internal
+;; v0 = ((v1 & 0xff) << 8)
+(define_insn_and_split "and0xff_s8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 8))
+		(const_int 65280)))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode)));
+  emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode)));
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000)
+(define_insn_and_split "insbsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int -16711681))
+		(and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				   (const_int 16))
+			(const_int 16711680))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, operands[1]);
+  emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2]));
+  emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | v2
+(define_insn_and_split "ior_and0xff00ffff_reg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -16711681))
+		(match_operand:SI 2 "register_operand" "r")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode)));
+  emit_insn (gen_iorsi3 (operands[0], tmp, operands[2]));
+  DONE;
+})
+
+(define_insn "vec_setv4qi_internal"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (match_operand:QI 1 "register_operand"        "    r,    r,    r,    r"))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+       const char *pats[] = { "insb\t%0, %1, 3",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 0" };
+      return pats[which_alternative];
+    }
+  else
+    {
+       const char *pats[] = { "insb\t%0, %1, 0",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 3" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_setv4qi_internal_vec"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand"    "    r,    r,    r,    r")
+	      (parallel [(const_int 0)])))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   insb\t%0, %1, 0
+   insb\t%0, %1, 1
+   insb\t%0, %1, 2
+   insb\t%0, %1, 3"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev4qi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev4qi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "vec_setv2hi"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 2)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_insn "vec_setv2hi_internal"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pkbb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev2hi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev2hi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "pkbb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "pkbbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI	(match_operand:HI 1 "register_operand" "r"))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff)
+(define_insn "pktbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(and:SI (match_operand:SI 2 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_3"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(match_operand:SI 1 "register_operand"                  " r"))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_4"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(zero_extend:SI (match_operand:HI 1 "register_operand"  " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pkttsi"
+  [(set (match_operand:SI 0 "register_operand"                      "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand"      " r")
+			(const_int -65536))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" " r")
+			     (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktt16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "pkbt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "vec_mergerr"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"    "    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+
+(define_insn "vec_merge"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (match_operand:V2HI 1 "register_operand"    "    r,    r")
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergerv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"                   "    r,    r,    r,    r"))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pktb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pkbt16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevr"
+  [(set (match_operand:V2HI 0 "register_operand"                      "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"                    "    r,    r,    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                 " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbt16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r,    r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv4qi"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(vec_select:QI
+	  (match_operand:V4QI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1
+      && INTVAL (operands[2]) != 2
+      && INTVAL (operands[2]) != 3)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv4qi0"
+  [(set (match_operand:QI 0 "register_operand"         "=l,r,r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seb33\t%0, %1";
+    case 1:
+      return "seb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_1 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_2 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_3 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi3_se"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 24
+   srai\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv4qi3_ze"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 24
+   srli\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn_and_split "vec_extractv4qihi0"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi0 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi1 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi2 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi3 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv2hi"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI
+	  (match_operand:V2HI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv2hi0"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_be"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_be"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_se (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "<su>mul16"
+  [(set (match_operand:V2SI 0 "register_operand"                         "=r")
+	(mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r"))
+		   (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mul16\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mulx16"
+  [(set (match_operand:V2SI 0 "register_operand"         "=r")
+	(vec_merge:V2SI
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 1 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))))
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 1)
+		  (parallel [(const_int 1)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mulx16\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "rotrv2hi_1"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv2hi_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_10"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pkbb\t%0, %1, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_32"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pktt\t%0, %1, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_unpacks_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand" "=r")
+   (match_operand:V4QI 1 "register_operand" " r")]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  emit_insn (gen_sunpkd810 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd810_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd820_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd830_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd831_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "zunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "smbb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smbt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smtt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "mulhisi3v"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(mult:SI
+	  (sign_extend:SI
+	     (vec_select:HI
+	       (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	  (sign_extend:SI (vec_select:HI
+	       (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")])))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1",
+			     "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2",
+			     "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmabb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmabt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmatt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_insn "kma_internal"
+  [(set (match_operand:SI 0 "register_operand"                          "=    r,    r,    r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 5 "register_operand"                        "     0,    0,    0,    0")
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:SI
+	      (vec_select:HI
+	        (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1",
+			     "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2",
+			     "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "smds"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smds_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smds_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smds_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smds_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smdrs_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+
+(define_expand "smxdsv_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_insn "smal1"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal2"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal3"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal4"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal5"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal6"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal7"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal8"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for smal
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for usmar64/usmsr64
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_move_insn (high_part_dst, const0_rtx);
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+
+  emit_insn (gen_extendhisi2 (low_part_dst, operands[1]));
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand"                 "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sunpkd820\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "smmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [(mult:DI
+		  	  (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+			  (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))]
+		       UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul.u\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"             " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"                         " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac.u\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"            " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"                        " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb.u\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (ss_mult:DI
+	      (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+	      (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [
+	      (ss_mult:DI
+		(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+		(mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))]
+	      UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul.u\t%0, %1, %2"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+(define_insn "smulhisi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "smulhisi3_highpart_2"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 1 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand"     "    r,    r")))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+}
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "smmw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI
+	      [(mult:DI
+		 (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		 (sign_extend:DI
+		   (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		     (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+	      UNSPEC_ROUND)
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt.u\t%0, %1, %2",
+			     "smmwb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb.u\t%0, %1, %2",
+			     "smmwt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmaw_internal"
+  [(set (match_operand:SI 0 "register_operand"                               "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                             "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 1 "register_operand"       "    r,    r"))
+		  (sign_extend:DI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt\t%0, %1, %2",
+			     "kmmawb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb\t%0, %1, %2",
+			     "kmmawt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+(define_insn "kmmaw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                                "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                              "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI
+		[(mult:DI
+		   (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		   (sign_extend:DI
+		     (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+		UNSPEC_ROUND)
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt.u\t%0, %1, %2",
+			     "kmmawb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb.u\t%0, %1, %2",
+			     "kmmawt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalbb"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smalbt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smaltt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smaddhidi"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaddhidi2"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalda1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalda1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "smaldrs3"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smaldrs3_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaldrs3_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalxda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalxds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalxd<add_sub>1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "smalxd<add_sub>1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	    (sign_extend:DI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslxda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	      (sign_extend:DI
+		(mult:SI
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 2 "register_operand" " r")
+				    (parallel [(const_int 1)])))
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 3 "register_operand" " r")
+				    (parallel [(const_int 0)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslxda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+;; mada for synthetize smalda
+(define_insn_and_split "mada1"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[5], operands[6]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+(define_insn_and_split "mada2"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[6], operands[5]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+;; sms for synthetize smalds
+(define_insn_and_split "sms1"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[5], operands[6]))"
+
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[5], operands[6]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[5], operands[6])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[5], operands[6]);
+  DONE;
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "sms2"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[6], operands[5]))"
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[6], operands[5]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[6], operands[5])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[6], operands[5]);
+  DONE;
+}
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmda\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmxda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmxda\t%0, %1, %2"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada2"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmads"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmads\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmadrs"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmadrs\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxds"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxds\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsxda\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+;; smax[8|16] and umax[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"               "=r")
+	(sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; smin[8|16] and umin[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"              "=r")
+	(sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "<opcode><mode>3_bb"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+	(sumin_max:<VELT> (vec_select:<VELT>
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 0)]))
+			  (vec_select:<VELT>
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode><mode>3_tt"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+	(sumin_max:<VELT> (vec_select:<VELT>
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:<VELT>
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotr<mode>_1 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0));
+  DONE;
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_22"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 2)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_2 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_33"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 3)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_3 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v2hi3_bbtt"
+  [(set (match_operand:V2HI 0 "register_operand"                         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_operand:V2HI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:HI
+			    (match_operand:V2HI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_dup:V2HI 1)
+			    (parallel [(const_int 0)]))
+			  (vec_select:HI
+			    (match_dup:HI 2)
+			    (parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv"
+{
+})
+
+(define_insn "kabs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "kabs<bits>\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "<su>mar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_3"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_4"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (extend:DI
+	  (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"   "mul")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+;; kmar64, kmsr64, ukmar64 and ukmsr64
+(define_insn "kmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsr64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmsr64\t%0, %2, %3"
+  [(set_attr "type"   "mac")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick1"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (not:SI (match_dup 3)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %2, %3"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick2"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 2))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %3, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (not:SI (match_dup 1)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick4"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 1))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick5"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand"         " r")
+	      (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick6"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (match_dup 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick7"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_dup 2)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick8"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_dup 1)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "sraiu"
+  [(set (match_operand:SI 0 "register_operand"                              "=   r, r")
+	(unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    r, r")
+				 (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))]
+		    UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai.u\t%0, %1, %2
+   sra.u\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length" "4")])
+
+(define_insn "kssl"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:SI (match_operand:SI 1 "register_operand"     "    r, r")
+		      (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli\t%0, %1, %2
+   ksll\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length" "4")])
+
+(define_insn "kslraw_round"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(if_then_else:SI
+	  (lt:SI (match_operand:SI 2 "register_operand"        " r")
+		 (const_int 0))
+	  (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r")
+				   (neg:SI (match_dup 2)))]
+		     UNSPEC_ROUND)
+	  (ss_ashift:SI (match_dup 1)
+			(match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "<shift>di3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(shift_rotate:DI (match_operand:DI 1 "register_operand" "")
+			 (match_operand:SI 2 "nds32_rimm6u_operand" "")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      nds32_split_<code>di3 (tmp, operands[1], operands[2]);
+      emit_move_insn (operands[0], tmp);
+    }
+  else
+    nds32_split_<code>di3 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "sclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip32\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "uclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip32\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "bitrev"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,    r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"     " r,    r")
+		    (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")]
+		   UNSPEC_BITREV))]
+  ""
+  "@
+   bitrev\t%0, %1, %2
+   bitrevi\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; wext, wexti
+(define_insn "<su>wext"
+  [(set (match_operand:SI 0 "register_operand"                "=r,   r")
+	(truncate:SI
+	  (shiftrt:DI
+	    (match_operand:DI 1 "register_operand"            " r,   r")
+	    (match_operand:SI 2 "nds32_rimm5u_operand"        " r,Iu05"))))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   wext\t%0, %1, %2
+   wexti\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+;; 32-bit add/sub instruction: raddw and rsubw.
+(define_insn "r<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (plus_minus:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "r<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+;; 32-bit add/sub instruction: uraddw and ursubw.
+(define_insn "ur<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (plus_minus:DI
+	      (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (zero_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ur<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-e8.md gcc-4.9.4/gcc/config/nds32/nds32-e8.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-e8.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-e8.md	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,284 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define E8 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_e8_machine")
+
+(define_cpu_unit "e8_ii" "nds32_e8_machine")
+(define_cpu_unit "e8_ex" "nds32_e8_machine")
+
+(define_insn_reservation "nds_e8_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load" 1
+  (and (match_test "nds32_load_single_p (insn)")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_store" 1
+  (and (match_test "nds32_store_single_p (insn)")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_load_double_p (insn)"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
+
+(define_insn_reservation "nds_e8_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_store_double_p (insn)"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*2, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*3, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*4, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*5, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*6, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*7, e8_ex")
+
+(define_insn_reservation "nds_e8_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*11, e8_ex")
+
+(define_insn_reservation "nds_e8_mul_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ex")
+
+(define_insn_reservation "nds_e8_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ex*16")
+
+(define_insn_reservation "nds_e8_mac_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, e8_ii+e8_ex, e8_ex")
+
+(define_insn_reservation "nds_e8_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "e8")))
+  "e8_ii, (e8_ii+e8_ex)*16, e8_ex")
+
+(define_insn_reservation "nds_e8_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, (e8_ii+e8_ex)*36, e8_ex")
+
+(define_insn_reservation "nds_e8_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "e8"))
+  "e8_ii, e8_ex")
+
+;; LD -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_load"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_load_to_ii_p"
+)
+
+;; LD -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_e8_load"
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_branch,\
+   nds_e8_store,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_load_to_ex_p"
+)
+
+;; ALU, MOVD44, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_ex_to_ii_p"
+)
+
+;; LMW(N, N) -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
+  "nds_e8_branch,\
+   nds_e8_load, nds_e8_store,\
+   nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_last_load_to_ii_p"
+)
+
+;; LMW(N, N) -> ALU, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_e8_load_multiple_1,nds_e8_load_multiple_2, nds_e8_load_multiple_3,\
+   nds_e8_load_multiple_4,nds_e8_load_multiple_5, nds_e8_load_multiple_6,\
+   nds_e8_load_multiple_7,nds_e8_load_multiple_8, nds_e8_load_multiple_12"
+  "nds_e8_alu,
+   nds_e8_mul_fast, nds_e8_mul_slow,\
+   nds_e8_mac_fast, nds_e8_mac_slow,\
+   nds_e8_div,\
+   nds_e8_branch,\
+   nds_e8_store,\
+   nds_e8_store_multiple_1,nds_e8_store_multiple_2, nds_e8_store_multiple_3,\
+   nds_e8_store_multiple_4,nds_e8_store_multiple_5, nds_e8_store_multiple_6,\
+   nds_e8_store_multiple_7,nds_e8_store_multiple_8, nds_e8_store_multiple_12"
+  "nds32_e8_last_load_to_ex_p"
+)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-elf.opt gcc-4.9.4/gcc/config/nds32/nds32-elf.opt
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-elf.opt	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-elf.opt	2016-08-08 20:37:45.498269782 +0200
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-fp-as-gp.c gcc-4.9.4/gcc/config/nds32/nds32-fp-as-gp.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-fp-as-gp.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-fp-as-gp.c	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,287 @@
+/* fp-as-gp pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "ira-int.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* A helper function to check if this function should contain prologue.  */
+static bool
+nds32_have_prologue_p (void)
+{
+  int i;
+
+  for (i = 0; i < 28; i++)
+    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
+      return true;
+
+  return (flag_pic
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+}
+
+static int
+nds32_get_symbol_count (void)
+{
+  int symbol_count = 0;
+  rtx insn;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  /* Counting the insn number which the addressing mode is symbol.  */
+	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
+	    {
+	      rtx pattern = PATTERN (insn);
+	      rtx mem;
+	      gcc_assert (GET_CODE (pattern) == SET);
+	      if (GET_CODE (SET_SRC (pattern)) == REG )
+		mem = SET_DEST (pattern);
+	      else
+		mem = SET_SRC (pattern);
+
+	      /* We have only lwi37 and swi37 for fp-as-gp optimization,
+		 so don't count any other than SImode.
+		 MEM for QImode and HImode will wrap by ZERO_EXTEND
+		 or SIGN_EXTEND */
+	      if (GET_CODE (mem) == MEM)
+		symbol_count++;
+	    }
+	}
+    }
+
+  return symbol_count;
+}
+
+/* Function to determine whether it is worth to do fp_as_gp optimization.
+   Return false: It is NOT worth to do fp_as_gp optimization.
+   Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
+   Note that if it is worth to do fp_as_gp optimization,
+   we MUST set FP_REGNUM ever live in this function.  */
+static bool
+nds32_fp_as_gp_check_available (void)
+{
+  basic_block bb;
+  basic_block exit_bb;
+  edge_iterator ei;
+  edge e;
+  bool first_exit_blocks_p;
+
+  /* If there exists ANY of following conditions,
+     we DO NOT perform fp_as_gp optimization:
+       1. TARGET_FORBID_FP_AS_GP is set
+	  regardless of the TARGET_FORCE_FP_AS_GP.
+       2. User explicitly uses 'naked'/'no_prologue' attribute.
+	  We use nds32_naked_function_p() to help such checking.
+       3. Not optimize for size.
+       4. Need frame pointer.
+       5. If $fp is already required to be saved,
+	  it means $fp is already choosen by register allocator.
+	  Thus we better not to use it for fp_as_gp optimization.
+       6. This function is a vararg function.
+	  DO NOT apply fp_as_gp optimization on this function
+	  because it may change and break stack frame.
+       7. The epilogue is empty.
+	  This happens when the function uses exit()
+	  or its attribute is no_return.
+	  In that case, compiler will not expand epilogue
+	  so that we have no chance to output .omit_fp_end directive.  */
+  if (TARGET_FORBID_FP_AS_GP
+      || nds32_naked_function_p (current_function_decl)
+      || !optimize_size
+      || frame_pointer_needed
+      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+      || (cfun->stdarg == 1)
+      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
+    return false;
+
+  /* Disable fp_as_gp if there is any infinite loop since the fp may
+     reuse in infinite loops by register rename.
+     For check infinite loops we should make sure exit_bb is post dominate
+     all other basic blocks if there is no infinite loops.  */
+  first_exit_blocks_p = true;
+  exit_bb = NULL;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      /* More than one exit block also do not perform fp_as_gp optimization.  */
+      if (!first_exit_blocks_p)
+	return false;
+
+      exit_bb = e->src;
+      first_exit_blocks_p = false;
+    }
+
+  /* Not found exit_bb? just abort fp_as_gp!  */
+  if (!exit_bb)
+    return false;
+
+  /* Each bb should post dominate by exit_bb if there is no infinite loop! */
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (!dominated_by_p (CDI_POST_DOMINATORS,
+			   bb,
+			   exit_bb))
+	return false;
+    }
+
+  /* Now we can check the possibility of using fp_as_gp optimization.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    {
+      /* User explicitly issues -mforce-fp-as-gp option.  */
+      return true;
+    }
+  else
+    {
+      /* In the following we are going to evaluate whether
+	 it is worth to do fp_as_gp optimization.  */
+      bool good_gain = false;
+      int symbol_count;
+
+      int threshold;
+
+      /* We check if there already requires prologue.
+	 Note that $gp will be saved in prologue for PIC code generation.
+	 After that, we can set threshold by the existence of prologue.
+	 Each fp-implied instruction will gain 2-byte code size
+	 from gp-aware instruction, so we have following heuristics.  */
+      if (flag_pic
+	  || nds32_have_prologue_p ())
+	{
+	  /* Have-prologue:
+	       Compiler already intends to generate prologue content,
+	       so the fp_as_gp optimization will only insert
+	       'la $fp,_FP_BASE_' instruction, which will be
+	       converted into 4-byte instruction at link time.
+	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
+	  threshold = 3;
+	}
+      else
+	{
+	  /* None-prologue:
+	       Compiler originally does not generate prologue content,
+	       so the fp_as_gp optimization will NOT ONLY insert
+	       'la $fp,_FP_BASE' instruction, but also causes
+	       push/pop instructions.
+	       If we are using v3push (push25/pop25),
+	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
+	       If we are using normal push (smw/lmw),
+	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
+	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
+	}
+
+      symbol_count = nds32_get_symbol_count ();
+
+      if (symbol_count >= threshold)
+	good_gain = true;
+
+      /* Enable fp_as_gp optimization when potential gain is good enough.  */
+      return good_gain;
+    }
+}
+
+static unsigned int
+nds32_fp_as_gp (void)
+{
+  bool fp_as_gp_p;
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+  /* Here is a hack to IRA for enable/disable a hard register per function.
+     We *MUST* review this way after migrate gcc 4.9! */
+  if (fp_as_gp_p) {
+    SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+    df_set_regs_ever_live (FP_REGNUM, 1);
+  } else {
+    CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+  }
+
+  cfun->machine->fp_as_gp_p = fp_as_gp_p;
+
+  free_dominance_info (CDI_POST_DOMINATORS);
+  return 1;
+}
+
+const pass_data pass_data_nds32_fp_as_gp =
+{
+  RTL_PASS,				/* type */
+  "fp_as_gp",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_verify_rtl_sharing,		/* todo_flags_finish */
+};
+
+class pass_nds32_fp_as_gp : public rtl_opt_pass
+{
+public:
+  pass_nds32_fp_as_gp (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return !TARGET_LINUX_ABI && TARGET_16_BIT && optimize_size; }
+  unsigned int execute () { return nds32_fp_as_gp (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_fp_as_gp (gcc::context *ctxt)
+{
+  return new pass_nds32_fp_as_gp (ctxt);
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-fpu.md gcc-4.9.4/gcc/config/nds32/nds32-fpu.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-fpu.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-fpu.md	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,475 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;SFmode moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SFmode, operands[1]);
+  if (CONST_DOUBLE_P (operands[1])
+      && !satisfies_constraint_Cs20 (operands[1]))
+    {
+      REAL_VALUE_TYPE r;
+      unsigned long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+      emit_move_insn (operands[0], gen_rtx_HIGH (SFmode, operands[1]));
+
+      if ((l & 0xFFF) != 0)
+	emit_insn (gen_movsf_lo (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "movsf_lo"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+		   (match_operand:SF 2 "immediate_operand" "i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")]
+)
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r, f, f, r, f, Q,   r,   r,    r")
+	(match_operand:SF 1 "general_operand"      " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, f, r, f, Q, f,Cs05,Cs20, Chig"))]
+  "(register_operand(operands[0], SFmode)
+    || register_operand(operands[1], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, 4);
+    case 6:
+      return nds32_output_32bit_store (operands, 4);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+      return nds32_output_16bit_load (operands, 4);
+    case 11:
+      return nds32_output_32bit_load (operands, 4);
+    case 12:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 13:
+      return "fmtsr\t%1, %0";
+    case 14:
+      return "fmfsr\t%0, %1";
+    case 15:
+      return nds32_output_float_load (operands);
+    case 16:
+      return nds32_output_float_store (operands);
+    case 17:
+      return "movi55\t%0, %1";
+    case 18:
+      return "movi\t%0, %1";
+    case 19:
+      return "sethi\t%0, %1";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,unknown,unknown,unknown,unknown,unknown,alu,alu,alu")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,      4,      4,      4,      4,      4,  2,  4,  4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1,  v1,    fpu,    fpu,    fpu,    fpu,    fpu, v1, v1, v1")])
+
+;; Conditional Move Instructions
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:ANYF 0 "register_operand" "")
+	(if_then_else:ANYF (match_operand 1 "nds32_float_comparison_operator" "")
+			   (match_operand:ANYF 2 "register_operand" "")
+			   (match_operand:ANYF 3 "register_operand" "")))]
+  ""
+{
+  if (nds32_cond_move_p (operands[1]))
+    {
+      /* Operands[1] condition code is UNORDERED or ORDERED, and
+	 sub-operands[1] MODE isn't SFmode or SFmode, return FAIL
+	 for gcc, because we don't using slt compare instruction
+	 to generate UNORDERED and ORDERED condition.  */
+      FAIL;
+    }
+  else
+    nds32_expand_float_movcc (operands);
+})
+
+(define_insn "fcmov<mode>_eq"
+  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
+	(if_then_else:ANYF (eq (match_operand:SI 1 "register_operand" "f, f")
+			       (const_int 0))
+			   (match_operand:ANYF 2 "register_operand" "f, 0")
+			   (match_operand:ANYF 3 "register_operand" "0, f")))]
+  ""
+  "@
+   fcmovz<size>\t%0,%2,%1
+   fcmovn<size>\t%0,%3,%1"
+  [(set_attr "length" "4, 4")]
+)
+
+(define_insn "fcmov<mode>_ne"
+  [(set (match_operand:ANYF 0 "register_operand" "=f, f")
+	(if_then_else:ANYF (ne (match_operand:SI 1 "register_operand" "f, f")
+			       (const_int 0))
+			   (match_operand:ANYF 2 "register_operand" "f, 0")
+			   (match_operand:ANYF 3 "register_operand" "0, f")))]
+  ""
+  "@
+   fcmovn<size>\t%0,%2,%1
+   fcmovz<size>\t%0,%3,%1"
+  [(set_attr "length" "4, 4")]
+)
+
+;; Arithmetic instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fadd<size>\t %0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		    (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fsub<size>\t %0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+;; Multiplication insns.
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fmul<size>\t %0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (match_operand:ANYF 3 "register_operand" "0")))]
+  "TARGET_EXT_FPU_FMA"
+  "fmadd<size>\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "fnma<mode>4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (match_operand:ANYF 3 "register_operand" "0")))]
+  "TARGET_EXT_FPU_FMA"
+  "fmsub<size>\t%0, %1, %2"
+  [(set_attr "length"	"4")]
+)
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
+  "TARGET_EXT_FPU_FMA"
+  "fnmsub<size>\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "fnms<mode>4"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		  (match_operand:ANYF 2 "register_operand" "f")
+		  (neg:ANYF (match_operand:ANYF 3 "register_operand" "0"))))]
+  "TARGET_EXT_FPU_FMA"
+  "fnmadd<size>\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+;; Div Instructions.
+
+(define_insn "div<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fdiv<size>\t %0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  ""
+  "fsqrt<size>\t %0, %1"
+  [(set_attr "length" "4")]
+)
+
+;; Conditional Branch patterns
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "nds32_float_comparison_operator"
+	 [(match_operand:ANYF 2 "register_operand" "")
+	  (match_operand:ANYF 3 "register_operand" "")]))]
+  ""
+{
+  nds32_expand_float_cstore (operands);
+  DONE;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_float_comparison_operator"
+		       [(match_operand:ANYF 1 "register_operand" "")
+			(match_operand:ANYF 2 "register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  nds32_expand_float_cbranch (operands);
+  DONE;
+})
+
+;; Copysign Instructions.
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		    (match_operand:SF 2 "register_operand" "f")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_FPU_SINGLE"
+  "fcpyss\t%0,%1,%2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "copysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		    (match_operand:DF 2 "register_operand" "f")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE"
+  "fcpysd\t%0,%1,%2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "*ncopysign<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+				(match_operand:ANYF 2 "register_operand" "f")]
+				UNSPEC_COPYSIGN)))]
+  ""
+  "fcpyns<size>\t%0,%1,%2"
+  [(set_attr "length" "4")]
+)
+
+;; Absolute Instructions
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f, r")
+	(abs:SF (match_operand:SF 1 "register_operand" "f, r")))]
+  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
+  "@
+   fabss\t%0, %1
+   bclr\t%0, %1, 31"
+  [(set_attr "length" "4")
+   (set_attr "feature" "fpu,pe1")]
+)
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_DOUBLE"
+  "fabsd\t%0, %1"
+  [(set_attr "length" "4")]
+)
+
+;; Negation Instructions
+
+(define_insn "*negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f, r")
+	(neg:SF (match_operand:SF 1 "register_operand" "f, r")))]
+  "TARGET_FPU_SINGLE || TARGET_EXT_PERF"
+  "@
+   fcpynss\t%0, %1, %1
+   btgl\t%0, %1, 31"
+  [(set_attr "length" "4")
+   (set_attr "feature" "fpu,pe1")]
+)
+
+(define_insn "*negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_DOUBLE"
+  "fcpynsd\t%0, %1, %1"
+  [(set_attr "length" "4")]
+)
+
+;; Data Format Conversion Instructions
+
+(define_insn "floatunssi<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unsigned_float:ANYF (match_operand:SI 1 "register_operand" "f")))]
+  ""
+  "fui2<size>\t %0, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "floatsi<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(float:ANYF (match_operand:SI 1 "register_operand" "f")))]
+  ""
+  "fsi2<size>\t %0, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "fixuns_trunc<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
+  ""
+  "f<size>2ui.z\t %0, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "fix_trunc<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:ANYF (match_operand:ANYF 1 "register_operand" "f"))))]
+  ""
+  "f<size>2si.z\t %0, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
+  "fs2d\t%0, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_FPU_SINGLE && TARGET_FPU_DOUBLE"
+  "fd2s\t%0, %1"
+  [(set_attr "length" "4")]
+)
+
+;; Compare Instructions
+
+(define_insn "cmp<mode>_eq"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(eq:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  {
+    if (NDS32_EXT_FPU_DOT_E)
+      return "fcmpeq<size>.e %0, %1, %2";
+    else
+      return "fcmpeq<size>\t%0, %1, %2";
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_insn "cmp<mode>_lt"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(lt:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmplt<size>.e %0, %1, %2";
+  else
+    return "fcmplt<size>\t%0, %1, %2";
+}
+  [(set_attr "length" "4")]
+)
+
+(define_insn "cmp<mode>_le"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(le:SI (match_operand:ANYF 1 "register_operand" "f")
+	       (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmple<size>.e %0, %1, %2";
+  else
+    return "fcmple<size>\t%0, %1, %2";
+}
+  [(set_attr "length" "4")]
+)
+
+(define_insn "cmp<mode>_un"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unordered:SI (match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+{
+  if (NDS32_EXT_FPU_DOT_E)
+    return "fcmpun<size>.e %0, %1, %2";
+  else
+    return "fcmpun<size>\t%0, %1, %2";
+}
+  [(set_attr "length" "4")]
+)
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))]
+  "!TARGET_FPU_SINGLE
+   && NDS32_IS_FPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_FPR_REGNUM (REGNO (operands[1]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = gen_rtx_REG (SFmode, TA_REGNUM);
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "!satisfies_constraint_Cs20 (operands[1])
+   && !satisfies_constraint_Chig (operands[1])"
+  [(set (match_dup 0) (high:SF (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
+;; ----------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-gcse.c gcc-4.9.4/gcc/config/nds32/nds32-gcse.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-gcse.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-gcse.c	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,650 @@
+/* Global CSE pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "diagnostic-core.h"
+
+#include "hash-table.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "flags.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "basic-block.h"
+#include "function.h"
+#include "expr.h"
+#include "except.h"
+#include "intl.h"
+#include "obstack.h"
+#include "hashtab.h"
+#include "params.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "dbgcnt.h"
+#include "df.h"
+
+/* ------------------------------------------------------------------------ */
+
+struct expr
+{
+  /* The expression.  */
+  rtx expr;
+
+  /* The same hash for this entry.  */
+  hashval_t hash;
+
+  struct occr *antic_occr;
+  /* The number of antic_occr.  */
+  unsigned int count;
+};
+
+struct occr
+{
+  /* Next occurrence of this expression.  */
+  struct occr *next;
+  /* The insn that computes the expression.  */
+  rtx insn;
+  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
+  char deleted_p;
+};
+
+struct reg_avail_info
+{
+  basic_block last_bb;
+  int first_set;
+  int first_use;
+};
+
+/* Hashtable helpers.  */
+
+struct expr_hasher : typed_noop_remove <expr>
+{
+  typedef expr value_type;
+  typedef expr compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
+/* Callback for hashtab.
+   Return the hash value for expression EXP.  We don't actually hash
+   here, we just return the cached hash value.  */
+
+inline hashval_t
+expr_hasher::hash (const value_type *exp)
+{
+  return exp->hash;
+}
+
+/* Callback for hashtab.
+   Return nonzero if exp1 is equivalent to exp2.  */
+
+inline bool
+expr_hasher::equal (const value_type *exp1, const compare_type *exp2)
+{
+  int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
+
+  gcc_assert (!equiv_p || exp1->hash == exp2->hash);
+  return equiv_p;
+}
+
+static hashval_t
+hash_expr (rtx x, int *do_not_record_p)
+{
+  *do_not_record_p = 0;
+  return hash_rtx (x, GET_MODE (x), do_not_record_p,
+		   NULL,  /*have_reg_qty=*/false);
+}
+
+
+/* Helpers for memory allocation/freeing.  */
+static void alloc_mem (void);
+static void free_mem (void);
+static void compute_hash_table (void);
+/* Scan the pattern of INSN and add an entry to the hash TABLE.
+   After reload we are interested in loads/stores only.  */
+static void hash_scan_set (rtx);
+static void insert_expr_in_table (rtx, rtx);
+static void dump_hash_table (FILE *);
+
+static struct obstack expr_obstack;
+/* The table itself.  */
+static hash_table <expr_hasher> expr_table;
+static struct reg_avail_info *reg_avail_info;
+static sbitmap *hoist_vbein;
+static sbitmap *hoist_vbeout;
+
+/* Allocate memory for the CUID mapping array and register/memory
+   tracking tables.  */
+
+static void
+alloc_mem (void)
+{
+  /* Allocate the available expressions hash table.  We don't want to
+     make the hash table too small, but unnecessarily making it too large
+     also doesn't help.  The i/4 is a gcse.c relic, and seems like a
+     reasonable choice.  */
+  expr_table.create (MAX (get_max_insn_count () / 4, 13));
+
+  /* We allocate everything on obstacks because we often can roll back
+     the whole obstack to some point.  Freeing obstacks is very fast.  */
+  gcc_obstack_init (&expr_obstack);
+}
+
+/* Free memory allocated by alloc_mem.  */
+
+static void
+free_mem (void)
+{
+  expr_table.dispose ();
+
+  obstack_free (&expr_obstack, NULL);
+}
+
+
+/* Dump all expressions and occurrences that are currently in the
+   expression hash table to FILE.  */
+
+/* This helper is called via htab_traverse.  */
+int
+nds32_dump_expr_hash_table_entry (expr **slot, FILE *file)
+{
+  struct expr *exprs = *slot;
+  struct occr *occr;
+
+  fprintf (file, "expr: ");
+  print_rtl (file, exprs->expr);
+  fprintf (file,"\nhashcode: %u\n", exprs->hash);
+  fprintf (file,"list of occurrences:\n");
+  occr = exprs->antic_occr;
+  while (occr)
+    {
+      rtx insn = occr->insn;
+      print_rtl_single (file, insn);
+      fprintf (file, "\n");
+      occr = occr->next;
+    }
+  fprintf (file, "\n");
+  return 1;
+}
+
+static void
+dump_hash_table (FILE *file)
+{
+  fprintf (file, "\n\nexpression hash table\n");
+  fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
+	   (long) expr_table.size (),
+	   (long) expr_table.elements (),
+	   expr_table.collisions ());
+  if (expr_table.elements () > 0)
+    {
+      fprintf (file, "\n\ntable entries:\n");
+      expr_table.traverse <FILE *, nds32_dump_expr_hash_table_entry> (file);
+    }
+  fprintf (file, "\n");
+}
+
+/* Insert expression X in INSN in the hash TABLE.
+   If it is already present, record it as the last occurrence in INSN's
+   basic block.  */
+
+static void
+insert_expr_in_table (rtx x, rtx insn)
+{
+  int do_not_record_p;
+  hashval_t hash;
+  struct expr *cur_expr, **slot;
+  struct occr *antic_occr, *last_occr = NULL;
+
+  hash = hash_expr (x, &do_not_record_p);
+
+  /* Do not insert expression in the table if it contains volatile operands,
+     or if hash_expr determines the expression is something we don't want
+     to or can't handle.  */
+  if (do_not_record_p)
+    return;
+
+  /* We anticipate that redundant expressions are rare, so for convenience
+     allocate a new hash table element here already and set its fields.
+     If we don't do this, we need a hack with a static struct expr.  Anyway,
+     obstack_free is really fast and one more obstack_alloc doesn't hurt if
+     we're going to see more expressions later on.  */
+  cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
+					    sizeof (struct expr));
+  cur_expr->expr = x;
+  cur_expr->hash = hash;
+  cur_expr->antic_occr = NULL;
+
+  slot = expr_table.find_slot_with_hash (cur_expr, hash, INSERT);
+
+  if (! (*slot))
+    /* The expression isn't found, so insert it.  */
+    *slot = cur_expr;
+  else
+    {
+      /* The expression is already in the table, so roll back the
+	 obstack and use the existing table entry.  */
+      obstack_free (&expr_obstack, cur_expr);
+      cur_expr = *slot;
+    }
+
+  /* Search for another occurrence in the same basic block.  */
+  antic_occr = cur_expr->antic_occr;
+  cur_expr->count++;
+  while (antic_occr
+	 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
+    {
+      /* If an occurrence isn't found, save a pointer to the end of
+	 the list.  */
+      last_occr = antic_occr;
+      antic_occr = antic_occr->next;
+    }
+
+  if (antic_occr)
+    /* Found another instance of the expression in the same basic block.
+       Prefer this occurrence to the currently recorded one.  We want
+       the last one in the block and the block is scanned from start
+       to end.  */
+    antic_occr->insn = insn;
+  else
+    {
+      /* First occurrence of this expression in this basic block.  */
+      antic_occr = (struct occr *) obstack_alloc (&expr_obstack,
+						  sizeof (struct occr));
+
+      /* First occurrence of this expression in any block?  */
+      if (cur_expr->antic_occr == NULL)
+	cur_expr->antic_occr = antic_occr;
+      else
+	last_occr->next = antic_occr;
+
+      antic_occr->insn = insn;
+      antic_occr->next = NULL;
+      antic_occr->deleted_p = 0;
+    }
+}
+
+/* Check whether this instruction is supported format.  */
+
+static void
+hash_scan_set (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  rtx src = SET_SRC (pat);
+  rtx dest = SET_DEST (pat);
+  int regno;
+  struct reg_avail_info *info;
+
+  /* Don't mess with jumps and nops.  */
+  if (JUMP_P (insn) || set_noop_p (pat))
+    return;
+
+  /* TODO: support more format.  */
+
+  /* Only consider locally anticipatable intructions currently.  */
+  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
+    {
+      regno = REGNO (dest);
+      info = &reg_avail_info[regno];
+
+      if (BLOCK_FOR_INSN (insn) == info->last_bb
+	  && info->first_set == DF_INSN_LUID (insn)
+	  && info->first_use >= info->first_set)
+	{
+	  /* Only support immediate input currently because
+	     this is bugzilla case.  */
+	  if (CONST_INT_P (src) || CONST_DOUBLE_P (src))
+	    insert_expr_in_table (PATTERN (insn), insn);
+	}
+    }
+}
+
+/* Record register first use information for REGNO in INSN.
+
+   first_use records the first place in the block where the register
+   is used and is used to compute "anticipatability".
+
+   last_bb records the block for which first_use is valid,
+   as a quick test to invalidate them.  */
+
+static void
+record_first_reg_use_info (rtx insn, int regno)
+{
+  struct reg_avail_info *info = &reg_avail_info[regno];
+  int luid = DF_INSN_LUID (insn);
+
+  if (info->last_bb != BLOCK_FOR_INSN (insn))
+    {
+      info->last_bb = BLOCK_FOR_INSN (insn);
+      info->first_use = luid;
+      /* Set the value to record the using is former than setting.  */
+      info->first_set = luid + 1;
+    }
+}
+
+/* Called from compute_hash_table via note_stores to handle one
+   SET or CLOBBER in an insn.  DATA is really the instruction in which
+   the SET is taking place.  */
+
+static void
+record_first_use_info (rtx *dest, void *data)
+{
+  rtx last_set_insn = (rtx) data;
+  int i, j;
+  enum rtx_code code;
+  const char *fmt;
+  rtx x = *dest;
+
+  if (x == 0)
+    return;
+
+  code = GET_CODE (x);
+  if (REG_P (x) && REGNO (x) <= SP_REGNUM)
+    {
+      record_first_reg_use_info (last_set_insn, REGNO (x));
+      /* DF and DI mode may use two registers.  */
+      if (GET_MODE_SIZE (GET_MODE (x)) == 8)
+	record_first_reg_use_info (last_set_insn, REGNO (x) + 1);
+    }
+
+  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	record_first_use_info (&XEXP (x, i), data);
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  record_first_use_info (&XVECEXP (x, i, j), data);
+    }
+}
+
+/* Record register first/block set information for REGNO in INSN.
+
+   first_set records the first place in the block where the register
+   is set and is used to compute "anticipatability".
+
+   last_bb records the block for which first_set is valid,
+   as a quick test to invalidate them.  */
+
+static void
+record_first_reg_set_info (rtx insn, int regno)
+{
+  struct reg_avail_info *info = &reg_avail_info[regno];
+  int luid = DF_INSN_LUID (insn);
+
+  if (info->last_bb != BLOCK_FOR_INSN (insn))
+    {
+      info->last_bb = BLOCK_FOR_INSN (insn);
+      info->first_set = luid;
+      /* Set the value to record the using is later than setting.  */
+      info->first_use = luid + 1;
+    }
+}
+
+/* Called from compute_hash_table via note_stores to handle one
+   SET or CLOBBER in an insn.  DATA is really the instruction in which
+   the SET is taking place.  */
+
+static void
+record_first_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
+{
+  rtx last_set_insn = (rtx) data;
+
+  if (GET_CODE (dest) == SUBREG)
+    dest = SUBREG_REG (dest);
+
+  if (REG_P (dest) && REGNO (dest) <= SP_REGNUM)
+    {
+      record_first_reg_set_info (last_set_insn, REGNO (dest));
+      if (GET_MODE_SIZE (GET_MODE (dest)) == 8)
+	record_first_reg_set_info (last_set_insn, REGNO (dest) + 1);
+    }
+}
+
+/* Build hash table for supported format instructions.
+   Only consider if the instruction is anticipatable in the basic block here.
+   We postpone the def-use check until hoisting.  */
+
+static void
+compute_hash_table (void)
+{
+  basic_block bb;
+  int i;
+
+  /* We only take care hard registers.  */
+  reg_avail_info =
+    (struct reg_avail_info *) xmalloc (sizeof (struct reg_avail_info) *
+				       (SP_REGNUM + 1));
+
+  for (i = 0; i < 32; i++)
+    reg_avail_info[i].last_bb = NULL;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn;
+
+      /* Do not hoist instrucion from block which has more
+	 than one predecessor.  */
+      if (EDGE_COUNT (bb->preds) > 1)
+	continue;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  /* Construct a caller save register barrier.  We cannot hoist the
+	     instruction over a function call which sets caller save
+	     registers.  */
+	  if (CALL_P (insn))
+	    {
+	      for (i = 0; i <= SP_REGNUM; i++)
+		if (call_used_regs[i])
+		  record_first_reg_use_info (insn, i);
+	      continue;
+	    }
+
+	  note_uses (&PATTERN (insn), record_first_use_info, insn);
+	  note_stores (PATTERN (insn), record_first_set_info, insn);
+	}
+
+      /* Build the hash table.  */
+      FOR_BB_INSNS (bb, insn)
+	if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
+	  hash_scan_set (insn);
+    }
+}
+
+/* Hoist instructions in this slot if possible.  */
+int
+nds32_find_gcse_expr_table (expr **slot, void *data ATTRIBUTE_UNUSED)
+{
+  struct expr *exprs = *slot;
+  struct occr *occr;
+  rtx insn;
+  rtx last_insn;
+  basic_block bb;
+  edge e;
+  unsigned ix;
+  unsigned emit_done;
+  unsigned cover;
+  df_ref *use_rec;
+
+  if (exprs->count < 2)
+    return 1;
+
+  bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
+  bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
+
+  /* Set the bit for this slot.  */
+  occr = exprs->antic_occr;
+  while (occr)
+    {
+      insn = occr->insn;
+      bb = BLOCK_FOR_INSN (insn);
+      if (!occr->deleted_p)
+	bitmap_set_bit (hoist_vbein[bb->index], 0);
+      occr = occr->next;
+    }
+
+  /* Try to hoist code for each basic block.  */
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    {
+      if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
+	bitmap_intersection_of_succs (hoist_vbeout[bb->index], hoist_vbein, bb);
+
+      if (bitmap_bit_p (hoist_vbeout[bb->index], 0)
+	  && EDGE_COUNT (bb->succs) > 1)
+	{
+	  emit_done = 0;
+	  cover = FALSE;
+	  for (e = NULL, ix = 0; ix < EDGE_COUNT (bb->succs); ix++)
+	    {
+	      e = EDGE_SUCC (bb, ix);
+	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+		continue;
+	      occr = exprs->antic_occr;
+	      while (occr)
+		{
+		  insn = occr->insn;
+		  if (!occr->deleted_p && e->dest == BLOCK_FOR_INSN (insn))
+		    break;
+		  occr = occr->next;
+		}
+
+	      if (!emit_done)
+		{
+		  last_insn = BB_END (bb);
+		  /* Check the defined register is not used by the last
+		     instruction of the previos block.*/
+		  for (use_rec = DF_INSN_USES (last_insn); *use_rec; use_rec++)
+		    {
+		      if (DF_REF_REGNO (*use_rec)
+			  == REGNO (SET_DEST (PATTERN (insn))))
+			{
+			  cover = TRUE;
+			  break;
+			}
+		    }
+
+		  /* TODO: support more format.  */
+		  if (cover)
+		    break;
+		  else if (JUMP_P (last_insn))
+		    {
+		      emit_insn_before_noloc (PATTERN (insn), last_insn, bb);
+		      emit_done = TRUE;
+		    }
+		  else
+		    break;
+		}
+
+	      if (emit_done)
+		{
+		  delete_insn (insn);
+		  occr->deleted_p = TRUE;
+		}
+	    }
+	}
+    }
+  return 1;
+}
+
+static int
+hoist_code (void)
+{
+  hoist_vbein = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
+  hoist_vbeout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), 1);
+
+  expr_table.traverse <void *, nds32_find_gcse_expr_table> (NULL);
+
+  sbitmap_vector_free (hoist_vbein);
+  sbitmap_vector_free (hoist_vbeout);
+
+  return 0;
+}
+
+
+static unsigned int
+nds32_gcse_opt (void)
+{
+
+  if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
+    return 0;
+  /* Allocate memory for this pass.
+     Also computes and initializes the insns' CUIDs.  */
+  alloc_mem ();
+
+  df_chain_add_problem (DF_DU_CHAIN);
+  df_insn_rescan_all ();
+  df_analyze ();
+
+  compute_hash_table ();
+
+  if (dump_file)
+    dump_hash_table (dump_file);
+
+  hoist_code ();
+
+  df_insn_rescan_all ();
+  free_mem ();
+  return 0;
+}
+
+const pass_data pass_data_nds32_gcse_opt =
+{
+  RTL_PASS,				/* type */
+  "gcse_opt",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  (TODO_df_finish | TODO_verify_rtl_sharing),	/* todo_flags_finish */
+};
+
+class pass_nds32_gcse_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_gcse_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_gcse_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return TARGET_GCSE_OPT; }
+  unsigned int execute () { return nds32_gcse_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_gcse_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_gcse_opt (ctxt);
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32.h gcc-4.9.4/gcc/config/nds32/nds32.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32.h	2016-08-08 20:37:45.590273343 +0200
@@ -1,5 +1,5 @@
 /* Definitions of target machine of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,69 +24,35 @@
 /* The following are auxiliary macros or structure declarations
    that are used all over the nds32.c and nds32.h.  */
 
-
-/* Computing the Length of an Insn.  */
 #define ADJUST_INSN_LENGTH(INSN, LENGTH) \
   (LENGTH = nds32_adjust_insn_length (INSN, LENGTH))
 
-/* Check instruction LS-37-FP-implied form.
-   Note: actually its immediate range is imm9u
-         since it is used for lwi37/swi37 instructions.  */
-#define NDS32_LS_37_FP_P(rt, ra, imm)       \
-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
-   && REGNO (ra) == FP_REGNUM               \
-   && satisfies_constraint_Iu09 (imm))
-
-/* Check instruction LS-37-SP-implied form.
-   Note: actually its immediate range is imm9u
-         since it is used for lwi37/swi37 instructions.  */
-#define NDS32_LS_37_SP_P(rt, ra, imm)       \
-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
-   && REGNO (ra) == SP_REGNUM               \
-   && satisfies_constraint_Iu09 (imm))
-
-
-/* Check load/store instruction form : Rt3, Ra3, imm3u.  */
-#define NDS32_LS_333_P(rt, ra, imm, mode) nds32_ls_333_p (rt, ra, imm, mode)
-
-/* Check load/store instruction form : Rt4, Ra5, const_int_0.
-   Note: no need to check ra because Ra5 means it covers all registers.  */
-#define NDS32_LS_450_P(rt, ra, imm)                     \
-  ((imm == const0_rtx)                                  \
-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS         \
-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
-
-/* Check instruction RRI-333-form.  */
-#define NDS32_RRI_333_P(rt, ra, imm)           \
-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS    \
-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \
-   && satisfies_constraint_Iu03 (imm))
-
-/* Check instruction RI-45-form.  */
-#define NDS32_RI_45_P(rt, ra, imm)                     \
-  (REGNO (rt) == REGNO (ra)                            \
-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS        \
-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS) \
-   && satisfies_constraint_Iu05 (imm))
-
-
-/* Check instruction RR-33-form.  */
-#define NDS32_RR_33_P(rt, ra)                   \
-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS)
-
-/* Check instruction RRR-333-form.  */
-#define NDS32_RRR_333_P(rt, ra, rb)             \
-  (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS     \
-   && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS  \
-   && REGNO_REG_CLASS (REGNO (rb)) == LOW_REGS)
-
-/* Check instruction RR-45-form.
-   Note: no need to check rb because Rb5 means it covers all registers.  */
-#define NDS32_RR_45_P(rt, ra, rb)               \
-  (REGNO (rt) == REGNO (ra)                     \
-   && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \
-       || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS))
+/* Use SYMBOL_FLAG_MACH_DEP to define our own symbol_ref flag.
+   It is used in nds32_encode_section_info() to store flag in symbol_ref
+   in case the symbol should be placed in .rodata section.
+   So that we can check it in nds32_legitimate_address_p().  */
+#define NDS32_SYMBOL_FLAG_RODATA \
+  (SYMBOL_FLAG_MACH_DEP << 0)
+#define NDS32_SYMBOL_REF_RODATA_P(x) \
+  ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0)
+
+enum nds32_relax_insn_type
+{
+  RELAX_ORI,
+  RELAX_PLT_ADD,
+  RELAX_TLS_ADD_or_LW,
+  RELAX_TLS_ADD_LW,
+  RELAX_TLS_LW_JRAL,
+  RELAX_DONE
+};
+
+/* Classifies expand result for expand helper function.  */
+enum nds32_expand_result_type
+{
+  EXPAND_DONE,
+  EXPAND_FAIL,
+  EXPAND_CREATE_TEMPLATE
+};
 
 /* Classifies address type to distinguish 16-bit/32-bit format.  */
 enum nds32_16bit_address_type
@@ -97,6 +63,10 @@
   ADDRESS_LO_REG_IMM3U,
   /* post_inc [lo_reg + imm3u]: 333 format address.  */
   ADDRESS_POST_INC_LO_REG_IMM3U,
+  /* post_modify [lo_reg + imm3u]: 333 format address.  */
+  ADDRESS_POST_MODIFY_LO_REG_IMM3U,
+  /* [$r8 + imm7u]: r8 imply address.  */
+  ADDRESS_R8_IMM7U,
   /* [$fp + imm7u]: fp imply address.  */
   ADDRESS_FP_IMM7U,
   /* [$sp + imm7u]: sp imply address.  */
@@ -105,18 +75,66 @@
   ADDRESS_NOT_16BIT_FORMAT
 };
 
-
 /* ------------------------------------------------------------------------ */
 
 /* Define maximum numbers of registers for passing arguments.  */
-#define NDS32_MAX_REGS_FOR_ARGS 6
+#define NDS32_MAX_GPR_REGS_FOR_ARGS 6
+#define NDS32_MAX_FPR_REGS_FOR_ARGS 6
 
 /* Define the register number for first argument.  */
 #define NDS32_GPR_ARG_FIRST_REGNUM 0
+#define NDS32_FPR_ARG_FIRST_REGNUM 34
 
 /* Define the register number for return value.  */
 #define NDS32_GPR_RET_FIRST_REGNUM 0
+#define NDS32_FPR_RET_FIRST_REGNUM 34
+
+/* Define the first integer register number.  */
+#define NDS32_FIRST_GPR_REGNUM 0
+/* Define the last integer register number.  */
+#define NDS32_LAST_GPR_REGNUM 31
+
+#define NDS32_FIRST_CALLEE_SAVE_GPR_REGNUM 6
+#define NDS32_LAST_CALLEE_SAVE_GPR_REGNUM \
+  (TARGET_REDUCED_REGS ? 10 : 14)
+
+/* Define the floating-point number of registers.  */
+#define NDS32_FLOAT_REGISTER_NUMBER                           \
+ (((nds32_fp_regnum == NDS32_CONFIG_FPU_0)              \
+   || (nds32_fp_regnum == NDS32_CONFIG_FPU_4)) ? 8      \
+  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_1)            \
+    || (nds32_fp_regnum == NDS32_CONFIG_FPU_5)) ? 16    \
+  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_2)            \
+    || (nds32_fp_regnum == NDS32_CONFIG_FPU_6)) ? 32    \
+  : ((nds32_fp_regnum == NDS32_CONFIG_FPU_3)            \
+    || (nds32_fp_regnum == NDS32_CONFIG_FPU_7)) ? 64    \
+  : 32)
+
+#define NDS32_EXT_FPU_DOT_E (nds32_fp_regnum >= 4)
+
+/* Define the first floating-point register number.  */
+#define NDS32_FIRST_FPR_REGNUM 34
+/* Define the last floating-point register number.  */
+#define NDS32_LAST_FPR_REGNUM \
+  (NDS32_FIRST_FPR_REGNUM + NDS32_FLOAT_REGISTER_NUMBER - 1)
+
+
+#define NDS32_IS_EXT_FPR_REGNUM(regno) \
+  (((regno) >= NDS32_FIRST_FPR_REGNUM + 32) \
+   && ((regno) < NDS32_FIRST_FPR_REGNUM + 64))
+
+#define NDS32_IS_FPR_REGNUM(regno) \
+  (((regno) >= NDS32_FIRST_FPR_REGNUM) \
+   && ((regno) <= NDS32_LAST_FPR_REGNUM))
 
+#define NDS32_FPR_REGNO_OK_FOR_SINGLE(regno) \
+  ((regno) <= NDS32_LAST_FPR_REGNUM)
+
+#define NDS32_FPR_REGNO_OK_FOR_DOUBLE(regno) \
+  ((((regno) - NDS32_FIRST_FPR_REGNUM) & 1) == 0)
+
+#define NDS32_IS_GPR_REGNUM(regno) \
+  (((regno) <= NDS32_LAST_GPR_REGNUM))
 
 /* Define double word alignment bits.  */
 #define NDS32_DOUBLE_WORD_ALIGNMENT 64
@@ -126,6 +144,16 @@
 #define NDS32_SINGLE_WORD_ALIGN_P(value) (((value) & 0x03) == 0)
 #define NDS32_DOUBLE_WORD_ALIGN_P(value) (((value) & 0x07) == 0)
 
+/* Determine whether we would like to have code generation strictly aligned.
+   We set it strictly aligned when -malways-align is enabled.
+   Check gcc/common/config/nds32/nds32-common.c for the optimizations that
+   apply -malways-align.  */
+#define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN)
+
+#define NDS32_HW_LOOP_P() (TARGET_HWLOOP && !TARGET_FORCE_NO_HWLOOP)
+
+#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP)
+
 /* Get alignment according to mode or type information.
    When 'type' is nonnull, there is no need to look at 'mode'.  */
 #define NDS32_MODE_TYPE_ALIGN(mode, type) \
@@ -147,26 +175,51 @@
 /* This macro is used to return the register number for passing argument.
    We need to obey the following rules:
      1. If it is required MORE THAN one register,
-        we need to further check if it really needs to be
-        aligned on double words.
-          a) If double word alignment is necessary,
-             the register number must be even value.
-          b) Otherwise, the register number can be odd or even value.
+	we need to further check if it really needs to be
+	aligned on double words.
+	  a) If double word alignment is necessary,
+	     the register number must be even value.
+	  b) Otherwise, the register number can be odd or even value.
      2. If it is required ONLY one register,
-        the register number can be odd or even value.  */
-#define NDS32_AVAILABLE_REGNUM_FOR_ARG(reg_offset, mode, type)  \
-  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                 \
-   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)      \
-      ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1)  \
-      : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))            \
+	the register number can be odd or even value.  */
+#define NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG(reg_offset, mode, type) \
+  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                    \
+   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)         \
+      ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1)     \
+      : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))               \
    : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM))
 
-/* This macro is to check if there are still available registers
-   for passing argument.  */
-#define NDS32_ARG_PASS_IN_REG_P(reg_offset, mode, type)      \
-  (((reg_offset) < NDS32_MAX_REGS_FOR_ARGS)                  \
-   && ((reg_offset) + NDS32_NEED_N_REGS_FOR_ARG (mode, type) \
-       <= NDS32_MAX_REGS_FOR_ARGS))
+#define NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG(reg_offset, mode, type) \
+  ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1)                    \
+   ? ((NDS32_MODE_TYPE_ALIGN (mode, type) > PARM_BOUNDARY)         \
+      ? (((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM + 1) & ~1)     \
+      : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM))               \
+   : ((reg_offset) + NDS32_FPR_ARG_FIRST_REGNUM))
+
+/* These two macros are to check if there are still available registers
+   for passing argument, which must be entirely in registers.  */
+#define NDS32_ARG_ENTIRE_IN_GPR_REG_P(reg_offset, mode, type)   \
+  ((NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \
+    + NDS32_NEED_N_REGS_FOR_ARG (mode, type))                   \
+   <= (NDS32_GPR_ARG_FIRST_REGNUM                               \
+       + NDS32_MAX_GPR_REGS_FOR_ARGS))
+
+#define NDS32_ARG_ENTIRE_IN_FPR_REG_P(reg_offset, mode, type)   \
+  ((NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \
+    + NDS32_NEED_N_REGS_FOR_ARG (mode, type))                   \
+   <= (NDS32_FPR_ARG_FIRST_REGNUM                               \
+       + NDS32_MAX_FPR_REGS_FOR_ARGS))
+
+/* These two macros are to check if there are still available registers
+   for passing argument, either entirely in registers or partially
+   in registers.  */
+#define NDS32_ARG_PARTIAL_IN_GPR_REG_P(reg_offset, mode, type) \
+  (NDS32_AVAILABLE_REGNUM_FOR_GPR_ARG (reg_offset, mode, type) \
+   < NDS32_GPR_ARG_FIRST_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS)
+
+#define NDS32_ARG_PARTIAL_IN_FPR_REG_P(reg_offset, mode, type) \
+  (NDS32_AVAILABLE_REGNUM_FOR_FPR_ARG (reg_offset, mode, type) \
+   < NDS32_FPR_ARG_FIRST_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS)
 
 /* This macro is to check if the register is required to be saved on stack.
    If call_used_regs[regno] == 0, regno is the callee-saved register.
@@ -176,6 +229,19 @@
 #define NDS32_REQUIRED_CALLEE_SAVED_P(regno)                  \
   ((!call_used_regs[regno]) && (df_regs_ever_live_p (regno)))
 
+/* This macro is to check if the push25/pop25 are available to be used
+   for code generation.  Because pop25 also performs return behavior,
+   the instructions may not be available for some cases.
+   If we want to use push25/pop25, all the following conditions must
+   be satisfied:
+     1. TARGET_V3PUSH is set.
+     2. Current function is not an ISR function.
+     3. Current function is not a variadic function.*/
+#define NDS32_V3PUSH_AVAILABLE_P  \
+  (TARGET_V3PUSH \
+   && !nds32_isr_function_p (current_function_decl) \
+   && (cfun->machine->va_args_size == 0))
+
 /* ------------------------------------------------------------------------ */
 
 /* A C structure for machine-specific, per-function data.
@@ -198,22 +264,44 @@
   /* Number of bytes on the stack for saving $lp.  */
   int lp_size;
 
-  /* Number of bytes on the stack for saving callee-saved registers.  */
-  int callee_saved_regs_size;
+  /* Number of bytes on the stack for saving general purpose
+     callee-saved registers.  */
+  int callee_saved_gpr_regs_size;
+
+  /* Number of bytes on the stack for saving floating-point
+     callee-saved registers.  */
+  int callee_saved_fpr_regs_size;
+
   /* The padding bytes in callee-saved area may be required.  */
-  int callee_saved_area_padding_bytes;
+  int callee_saved_area_gpr_padding_bytes;
 
-  /* The first required register that should be saved on stack
-     for va_args (one named argument + nameless arguments).  */
+  /* The first required general purpose callee-saved register.  */
+  int callee_saved_first_gpr_regno;
+  /* The last required general purpose callee-saved register.  */
+  int callee_saved_last_gpr_regno;
+
+  /* The first required floating-point callee-saved register.  */
+  int callee_saved_first_fpr_regno;
+  /* The last required floating-point callee-saved register.  */
+  int callee_saved_last_fpr_regno;
+
+  /* The padding bytes in varargs area may be required.  */
+  int va_args_area_padding_bytes;
+  /* The first required register that should be saved on stack for va_args.  */
   int va_args_first_regno;
-  /* The last required register that should be saved on stack
-     for va_args (one named argument + nameless arguments).  */
+  /* The last required register that should be saved on stack for va_args.  */
   int va_args_last_regno;
 
-  /* The first required callee-saved register.  */
-  int callee_saved_regs_first_regno;
-  /* The last required callee-saved register.  */
-  int callee_saved_regs_last_regno;
+  /* Number of bytes on the stack for saving exception handling registers.  */
+  int eh_return_data_regs_size;
+  /* The first register of passing exception handling information.  */
+  int eh_return_data_first_regno;
+  /* The last register of passing exception handling information.  */
+  int eh_return_data_last_regno;
+
+  /* Indicate that whether this function
+     calls __builtin_eh_return.  */
+  int use_eh_return_p;
 
   /* Indicate that whether this function needs
      prologue/epilogue code generation.  */
@@ -221,12 +309,27 @@
   /* Indicate that whether this function
      uses fp_as_gp optimization.  */
   int fp_as_gp_p;
+  /* Indicate that whether this function is under strictly aligned
+     situation for legitimate address checking.  This flag informs
+     nds32_legitimate_address_p() how to treat offset alignment:
+       1. The IVOPT phase needs to detect available range for memory access,
+	  such as checking [base + 32767] ~ [base + (-32768)].
+	  For this case we do not want address to be strictly aligned.
+       2. The rtl lowering and optimization are close to target code.
+	  For this case we need address to be strictly aligned.  */
+  int strict_aligned_p;
+
+  /* Record two similar attributes status.  */
+  int attr_naked_p;
+  int attr_no_prologue_p;
+
 };
 
 /* A C structure that contains the arguments information.  */
 typedef struct
 {
-  unsigned int reg_offset;
+  unsigned int gpr_offset;
+  unsigned int fpr_offset;
 } nds32_cumulative_args;
 
 /* ------------------------------------------------------------------------ */
@@ -265,7 +368,8 @@
 {
   NDS32_NESTED,
   NDS32_NOT_NESTED,
-  NDS32_NESTED_READY
+  NDS32_NESTED_READY,
+  NDS32_CRITICAL
 };
 
 /* Define structure to record isr information.
@@ -317,106 +421,615 @@
 {
   NDS32_BUILTIN_ISYNC,
   NDS32_BUILTIN_ISB,
+  NDS32_BUILTIN_DSB,
+  NDS32_BUILTIN_MSYNC_ALL,
+  NDS32_BUILTIN_MSYNC_STORE,
   NDS32_BUILTIN_MFSR,
   NDS32_BUILTIN_MFUSR,
   NDS32_BUILTIN_MTSR,
+  NDS32_BUILTIN_MTSR_ISB,
+  NDS32_BUILTIN_MTSR_DSB,
   NDS32_BUILTIN_MTUSR,
   NDS32_BUILTIN_SETGIE_EN,
-  NDS32_BUILTIN_SETGIE_DIS
+  NDS32_BUILTIN_SETGIE_DIS,
+  NDS32_BUILTIN_FMFCFG,
+  NDS32_BUILTIN_FMFCSR,
+  NDS32_BUILTIN_FMTCSR,
+  NDS32_BUILTIN_FCPYNSS,
+  NDS32_BUILTIN_FCPYSS,
+  NDS32_BUILTIN_FCPYNSD,
+  NDS32_BUILTIN_FCPYSD,
+  NDS32_BUILTIN_FABSS,
+  NDS32_BUILTIN_FABSD,
+  NDS32_BUILTIN_FSQRTS,
+  NDS32_BUILTIN_FSQRTD,
+  NDS32_BUILTIN_ABS,
+  NDS32_BUILTIN_AVE,
+  NDS32_BUILTIN_BCLR,
+  NDS32_BUILTIN_BSET,
+  NDS32_BUILTIN_BTGL,
+  NDS32_BUILTIN_BTST,
+  NDS32_BUILTIN_CLIP,
+  NDS32_BUILTIN_CLIPS,
+  NDS32_BUILTIN_CLZ,
+  NDS32_BUILTIN_CLO,
+  NDS32_BUILTIN_MAX,
+  NDS32_BUILTIN_MIN,
+  NDS32_BUILTIN_PBSAD,
+  NDS32_BUILTIN_PBSADA,
+  NDS32_BUILTIN_BSE,
+  NDS32_BUILTIN_BSP,
+  NDS32_BUILTIN_FFB,
+  NDS32_BUILTIN_FFMISM,
+  NDS32_BUILTIN_FLMISM,
+  NDS32_BUILTIN_KADDW,
+  NDS32_BUILTIN_KSUBW,
+  NDS32_BUILTIN_KADDH,
+  NDS32_BUILTIN_KSUBH,
+  NDS32_BUILTIN_KDMBB,
+  NDS32_BUILTIN_V_KDMBB,
+  NDS32_BUILTIN_KDMBT,
+  NDS32_BUILTIN_V_KDMBT,
+  NDS32_BUILTIN_KDMTB,
+  NDS32_BUILTIN_V_KDMTB,
+  NDS32_BUILTIN_KDMTT,
+  NDS32_BUILTIN_V_KDMTT,
+  NDS32_BUILTIN_KHMBB,
+  NDS32_BUILTIN_V_KHMBB,
+  NDS32_BUILTIN_KHMBT,
+  NDS32_BUILTIN_V_KHMBT,
+  NDS32_BUILTIN_KHMTB,
+  NDS32_BUILTIN_V_KHMTB,
+  NDS32_BUILTIN_KHMTT,
+  NDS32_BUILTIN_V_KHMTT,
+  NDS32_BUILTIN_KSLRAW,
+  NDS32_BUILTIN_KSLRAW_U,
+  NDS32_BUILTIN_RDOV,
+  NDS32_BUILTIN_CLROV,
+  NDS32_BUILTIN_ROTR,
+  NDS32_BUILTIN_SVA,
+  NDS32_BUILTIN_SVS,
+  NDS32_BUILTIN_WSBH,
+  NDS32_BUILTIN_JR_ITOFF,
+  NDS32_BUILTIN_JR_TOFF,
+  NDS32_BUILTIN_JRAL_ITON,
+  NDS32_BUILTIN_JRAL_TON,
+  NDS32_BUILTIN_RET_ITOFF,
+  NDS32_BUILTIN_RET_TOFF,
+  NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT,
+  NDS32_BUILTIN_STANDBY_WAKE_GRANT,
+  NDS32_BUILTIN_STANDBY_WAKE_DONE,
+  NDS32_BUILTIN_TEQZ,
+  NDS32_BUILTIN_TNEZ,
+  NDS32_BUILTIN_TRAP,
+  NDS32_BUILTIN_SETEND_BIG,
+  NDS32_BUILTIN_SETEND_LITTLE,
+  NDS32_BUILTIN_SYSCALL,
+  NDS32_BUILTIN_BREAK,
+  NDS32_BUILTIN_NOP,
+  NDS32_BUILTIN_SCHE_BARRIER,
+  NDS32_BUILTIN_GET_CURRENT_SP,
+  NDS32_BUILTIN_SET_CURRENT_SP,
+  NDS32_BUILTIN_RETURN_ADDRESS,
+  NDS32_BUILTIN_LLW,
+  NDS32_BUILTIN_LWUP,
+  NDS32_BUILTIN_LBUP,
+  NDS32_BUILTIN_SCW,
+  NDS32_BUILTIN_SWUP,
+  NDS32_BUILTIN_SBUP,
+  NDS32_BUILTIN_CCTL_VA_LCK,
+  NDS32_BUILTIN_CCTL_IDX_WBINVAL,
+  NDS32_BUILTIN_CCTL_VA_WBINVAL_L1,
+  NDS32_BUILTIN_CCTL_VA_WBINVAL_LA,
+  NDS32_BUILTIN_CCTL_IDX_READ,
+  NDS32_BUILTIN_CCTL_IDX_WRITE,
+  NDS32_BUILTIN_CCTL_L1D_INVALALL,
+  NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL,
+  NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL,
+  NDS32_BUILTIN_DPREF_QW,
+  NDS32_BUILTIN_DPREF_HW,
+  NDS32_BUILTIN_DPREF_W,
+  NDS32_BUILTIN_DPREF_DW,
+  NDS32_BUILTIN_TLBOP_TRD,
+  NDS32_BUILTIN_TLBOP_TWR,
+  NDS32_BUILTIN_TLBOP_RWR,
+  NDS32_BUILTIN_TLBOP_RWLK,
+  NDS32_BUILTIN_TLBOP_UNLK,
+  NDS32_BUILTIN_TLBOP_PB,
+  NDS32_BUILTIN_TLBOP_INV,
+  NDS32_BUILTIN_TLBOP_FLUA,
+  NDS32_BUILTIN_UALOAD_HW,
+  NDS32_BUILTIN_UALOAD_W,
+  NDS32_BUILTIN_UALOAD_DW,
+  NDS32_BUILTIN_UASTORE_HW,
+  NDS32_BUILTIN_UASTORE_W,
+  NDS32_BUILTIN_UASTORE_DW,
+  NDS32_BUILTIN_GIE_DIS,
+  NDS32_BUILTIN_GIE_EN,
+  NDS32_BUILTIN_ENABLE_INT,
+  NDS32_BUILTIN_DISABLE_INT,
+  NDS32_BUILTIN_SET_PENDING_SWINT,
+  NDS32_BUILTIN_CLR_PENDING_SWINT,
+  NDS32_BUILTIN_CLR_PENDING_HWINT,
+  NDS32_BUILTIN_GET_ALL_PENDING_INT,
+  NDS32_BUILTIN_GET_PENDING_INT,
+  NDS32_BUILTIN_SET_INT_PRIORITY,
+  NDS32_BUILTIN_GET_INT_PRIORITY,
+  NDS32_BUILTIN_SET_TRIG_LEVEL,
+  NDS32_BUILTIN_SET_TRIG_EDGE,
+  NDS32_BUILTIN_GET_TRIG_TYPE,
+  NDS32_BUILTIN_SIGNATURE_BEGIN,
+  NDS32_BUILTIN_SIGNATURE_END,
+  NDS32_BUILTIN_DSP_BEGIN,
+  NDS32_BUILTIN_ADD16,
+  NDS32_BUILTIN_V_UADD16,
+  NDS32_BUILTIN_V_SADD16,
+  NDS32_BUILTIN_RADD16,
+  NDS32_BUILTIN_V_RADD16,
+  NDS32_BUILTIN_URADD16,
+  NDS32_BUILTIN_V_URADD16,
+  NDS32_BUILTIN_KADD16,
+  NDS32_BUILTIN_V_KADD16,
+  NDS32_BUILTIN_UKADD16,
+  NDS32_BUILTIN_V_UKADD16,
+  NDS32_BUILTIN_SUB16,
+  NDS32_BUILTIN_V_USUB16,
+  NDS32_BUILTIN_V_SSUB16,
+  NDS32_BUILTIN_RSUB16,
+  NDS32_BUILTIN_V_RSUB16,
+  NDS32_BUILTIN_URSUB16,
+  NDS32_BUILTIN_V_URSUB16,
+  NDS32_BUILTIN_KSUB16,
+  NDS32_BUILTIN_V_KSUB16,
+  NDS32_BUILTIN_UKSUB16,
+  NDS32_BUILTIN_V_UKSUB16,
+  NDS32_BUILTIN_CRAS16,
+  NDS32_BUILTIN_V_UCRAS16,
+  NDS32_BUILTIN_V_SCRAS16,
+  NDS32_BUILTIN_RCRAS16,
+  NDS32_BUILTIN_V_RCRAS16,
+  NDS32_BUILTIN_URCRAS16,
+  NDS32_BUILTIN_V_URCRAS16,
+  NDS32_BUILTIN_KCRAS16,
+  NDS32_BUILTIN_V_KCRAS16,
+  NDS32_BUILTIN_UKCRAS16,
+  NDS32_BUILTIN_V_UKCRAS16,
+  NDS32_BUILTIN_CRSA16,
+  NDS32_BUILTIN_V_UCRSA16,
+  NDS32_BUILTIN_V_SCRSA16,
+  NDS32_BUILTIN_RCRSA16,
+  NDS32_BUILTIN_V_RCRSA16,
+  NDS32_BUILTIN_URCRSA16,
+  NDS32_BUILTIN_V_URCRSA16,
+  NDS32_BUILTIN_KCRSA16,
+  NDS32_BUILTIN_V_KCRSA16,
+  NDS32_BUILTIN_UKCRSA16,
+  NDS32_BUILTIN_V_UKCRSA16,
+  NDS32_BUILTIN_ADD8,
+  NDS32_BUILTIN_V_UADD8,
+  NDS32_BUILTIN_V_SADD8,
+  NDS32_BUILTIN_RADD8,
+  NDS32_BUILTIN_V_RADD8,
+  NDS32_BUILTIN_URADD8,
+  NDS32_BUILTIN_V_URADD8,
+  NDS32_BUILTIN_KADD8,
+  NDS32_BUILTIN_V_KADD8,
+  NDS32_BUILTIN_UKADD8,
+  NDS32_BUILTIN_V_UKADD8,
+  NDS32_BUILTIN_SUB8,
+  NDS32_BUILTIN_V_USUB8,
+  NDS32_BUILTIN_V_SSUB8,
+  NDS32_BUILTIN_RSUB8,
+  NDS32_BUILTIN_V_RSUB8,
+  NDS32_BUILTIN_URSUB8,
+  NDS32_BUILTIN_V_URSUB8,
+  NDS32_BUILTIN_KSUB8,
+  NDS32_BUILTIN_V_KSUB8,
+  NDS32_BUILTIN_UKSUB8,
+  NDS32_BUILTIN_V_UKSUB8,
+  NDS32_BUILTIN_SRA16,
+  NDS32_BUILTIN_V_SRA16,
+  NDS32_BUILTIN_SRA16_U,
+  NDS32_BUILTIN_V_SRA16_U,
+  NDS32_BUILTIN_SRL16,
+  NDS32_BUILTIN_V_SRL16,
+  NDS32_BUILTIN_SRL16_U,
+  NDS32_BUILTIN_V_SRL16_U,
+  NDS32_BUILTIN_SLL16,
+  NDS32_BUILTIN_V_SLL16,
+  NDS32_BUILTIN_KSLL16,
+  NDS32_BUILTIN_V_KSLL16,
+  NDS32_BUILTIN_KSLRA16,
+  NDS32_BUILTIN_V_KSLRA16,
+  NDS32_BUILTIN_KSLRA16_U,
+  NDS32_BUILTIN_V_KSLRA16_U,
+  NDS32_BUILTIN_CMPEQ16,
+  NDS32_BUILTIN_V_SCMPEQ16,
+  NDS32_BUILTIN_V_UCMPEQ16,
+  NDS32_BUILTIN_SCMPLT16,
+  NDS32_BUILTIN_V_SCMPLT16,
+  NDS32_BUILTIN_SCMPLE16,
+  NDS32_BUILTIN_V_SCMPLE16,
+  NDS32_BUILTIN_UCMPLT16,
+  NDS32_BUILTIN_V_UCMPLT16,
+  NDS32_BUILTIN_UCMPLE16,
+  NDS32_BUILTIN_V_UCMPLE16,
+  NDS32_BUILTIN_CMPEQ8,
+  NDS32_BUILTIN_V_SCMPEQ8,
+  NDS32_BUILTIN_V_UCMPEQ8,
+  NDS32_BUILTIN_SCMPLT8,
+  NDS32_BUILTIN_V_SCMPLT8,
+  NDS32_BUILTIN_SCMPLE8,
+  NDS32_BUILTIN_V_SCMPLE8,
+  NDS32_BUILTIN_UCMPLT8,
+  NDS32_BUILTIN_V_UCMPLT8,
+  NDS32_BUILTIN_UCMPLE8,
+  NDS32_BUILTIN_V_UCMPLE8,
+  NDS32_BUILTIN_SMIN16,
+  NDS32_BUILTIN_V_SMIN16,
+  NDS32_BUILTIN_UMIN16,
+  NDS32_BUILTIN_V_UMIN16,
+  NDS32_BUILTIN_SMAX16,
+  NDS32_BUILTIN_V_SMAX16,
+  NDS32_BUILTIN_UMAX16,
+  NDS32_BUILTIN_V_UMAX16,
+  NDS32_BUILTIN_SCLIP16,
+  NDS32_BUILTIN_V_SCLIP16,
+  NDS32_BUILTIN_UCLIP16,
+  NDS32_BUILTIN_V_UCLIP16,
+  NDS32_BUILTIN_KHM16,
+  NDS32_BUILTIN_V_KHM16,
+  NDS32_BUILTIN_KHMX16,
+  NDS32_BUILTIN_V_KHMX16,
+  NDS32_BUILTIN_KABS16,
+  NDS32_BUILTIN_V_KABS16,
+  NDS32_BUILTIN_SMIN8,
+  NDS32_BUILTIN_V_SMIN8,
+  NDS32_BUILTIN_UMIN8,
+  NDS32_BUILTIN_V_UMIN8,
+  NDS32_BUILTIN_SMAX8,
+  NDS32_BUILTIN_V_SMAX8,
+  NDS32_BUILTIN_UMAX8,
+  NDS32_BUILTIN_V_UMAX8,
+  NDS32_BUILTIN_KABS8,
+  NDS32_BUILTIN_V_KABS8,
+  NDS32_BUILTIN_SUNPKD810,
+  NDS32_BUILTIN_V_SUNPKD810,
+  NDS32_BUILTIN_SUNPKD820,
+  NDS32_BUILTIN_V_SUNPKD820,
+  NDS32_BUILTIN_SUNPKD830,
+  NDS32_BUILTIN_V_SUNPKD830,
+  NDS32_BUILTIN_SUNPKD831,
+  NDS32_BUILTIN_V_SUNPKD831,
+  NDS32_BUILTIN_ZUNPKD810,
+  NDS32_BUILTIN_V_ZUNPKD810,
+  NDS32_BUILTIN_ZUNPKD820,
+  NDS32_BUILTIN_V_ZUNPKD820,
+  NDS32_BUILTIN_ZUNPKD830,
+  NDS32_BUILTIN_V_ZUNPKD830,
+  NDS32_BUILTIN_ZUNPKD831,
+  NDS32_BUILTIN_V_ZUNPKD831,
+  NDS32_BUILTIN_RADDW,
+  NDS32_BUILTIN_URADDW,
+  NDS32_BUILTIN_RSUBW,
+  NDS32_BUILTIN_URSUBW,
+  NDS32_BUILTIN_SRA_U,
+  NDS32_BUILTIN_KSLL,
+  NDS32_BUILTIN_PKBB16,
+  NDS32_BUILTIN_V_PKBB16,
+  NDS32_BUILTIN_PKBT16,
+  NDS32_BUILTIN_V_PKBT16,
+  NDS32_BUILTIN_PKTB16,
+  NDS32_BUILTIN_V_PKTB16,
+  NDS32_BUILTIN_PKTT16,
+  NDS32_BUILTIN_V_PKTT16,
+  NDS32_BUILTIN_SMMUL,
+  NDS32_BUILTIN_SMMUL_U,
+  NDS32_BUILTIN_KMMAC,
+  NDS32_BUILTIN_KMMAC_U,
+  NDS32_BUILTIN_KMMSB,
+  NDS32_BUILTIN_KMMSB_U,
+  NDS32_BUILTIN_KWMMUL,
+  NDS32_BUILTIN_KWMMUL_U,
+  NDS32_BUILTIN_SMMWB,
+  NDS32_BUILTIN_V_SMMWB,
+  NDS32_BUILTIN_SMMWB_U,
+  NDS32_BUILTIN_V_SMMWB_U,
+  NDS32_BUILTIN_SMMWT,
+  NDS32_BUILTIN_V_SMMWT,
+  NDS32_BUILTIN_SMMWT_U,
+  NDS32_BUILTIN_V_SMMWT_U,
+  NDS32_BUILTIN_KMMAWB,
+  NDS32_BUILTIN_V_KMMAWB,
+  NDS32_BUILTIN_KMMAWB_U,
+  NDS32_BUILTIN_V_KMMAWB_U,
+  NDS32_BUILTIN_KMMAWT,
+  NDS32_BUILTIN_V_KMMAWT,
+  NDS32_BUILTIN_KMMAWT_U,
+  NDS32_BUILTIN_V_KMMAWT_U,
+  NDS32_BUILTIN_SMBB,
+  NDS32_BUILTIN_V_SMBB,
+  NDS32_BUILTIN_SMBT,
+  NDS32_BUILTIN_V_SMBT,
+  NDS32_BUILTIN_SMTT,
+  NDS32_BUILTIN_V_SMTT,
+  NDS32_BUILTIN_KMDA,
+  NDS32_BUILTIN_V_KMDA,
+  NDS32_BUILTIN_KMXDA,
+  NDS32_BUILTIN_V_KMXDA,
+  NDS32_BUILTIN_SMDS,
+  NDS32_BUILTIN_V_SMDS,
+  NDS32_BUILTIN_SMDRS,
+  NDS32_BUILTIN_V_SMDRS,
+  NDS32_BUILTIN_SMXDS,
+  NDS32_BUILTIN_V_SMXDS,
+  NDS32_BUILTIN_KMABB,
+  NDS32_BUILTIN_V_KMABB,
+  NDS32_BUILTIN_KMABT,
+  NDS32_BUILTIN_V_KMABT,
+  NDS32_BUILTIN_KMATT,
+  NDS32_BUILTIN_V_KMATT,
+  NDS32_BUILTIN_KMADA,
+  NDS32_BUILTIN_V_KMADA,
+  NDS32_BUILTIN_KMAXDA,
+  NDS32_BUILTIN_V_KMAXDA,
+  NDS32_BUILTIN_KMADS,
+  NDS32_BUILTIN_V_KMADS,
+  NDS32_BUILTIN_KMADRS,
+  NDS32_BUILTIN_V_KMADRS,
+  NDS32_BUILTIN_KMAXDS,
+  NDS32_BUILTIN_V_KMAXDS,
+  NDS32_BUILTIN_KMSDA,
+  NDS32_BUILTIN_V_KMSDA,
+  NDS32_BUILTIN_KMSXDA,
+  NDS32_BUILTIN_V_KMSXDA,
+  NDS32_BUILTIN_SMAL,
+  NDS32_BUILTIN_V_SMAL,
+  NDS32_BUILTIN_BITREV,
+  NDS32_BUILTIN_WEXT,
+  NDS32_BUILTIN_BPICK,
+  NDS32_BUILTIN_INSB,
+  NDS32_BUILTIN_SADD64,
+  NDS32_BUILTIN_UADD64,
+  NDS32_BUILTIN_RADD64,
+  NDS32_BUILTIN_URADD64,
+  NDS32_BUILTIN_KADD64,
+  NDS32_BUILTIN_UKADD64,
+  NDS32_BUILTIN_SSUB64,
+  NDS32_BUILTIN_USUB64,
+  NDS32_BUILTIN_RSUB64,
+  NDS32_BUILTIN_URSUB64,
+  NDS32_BUILTIN_KSUB64,
+  NDS32_BUILTIN_UKSUB64,
+  NDS32_BUILTIN_SMAR64,
+  NDS32_BUILTIN_SMSR64,
+  NDS32_BUILTIN_UMAR64,
+  NDS32_BUILTIN_UMSR64,
+  NDS32_BUILTIN_KMAR64,
+  NDS32_BUILTIN_KMSR64,
+  NDS32_BUILTIN_UKMAR64,
+  NDS32_BUILTIN_UKMSR64,
+  NDS32_BUILTIN_SMALBB,
+  NDS32_BUILTIN_V_SMALBB,
+  NDS32_BUILTIN_SMALBT,
+  NDS32_BUILTIN_V_SMALBT,
+  NDS32_BUILTIN_SMALTT,
+  NDS32_BUILTIN_V_SMALTT,
+  NDS32_BUILTIN_SMALDA,
+  NDS32_BUILTIN_V_SMALDA,
+  NDS32_BUILTIN_SMALXDA,
+  NDS32_BUILTIN_V_SMALXDA,
+  NDS32_BUILTIN_SMALDS,
+  NDS32_BUILTIN_V_SMALDS,
+  NDS32_BUILTIN_SMALDRS,
+  NDS32_BUILTIN_V_SMALDRS,
+  NDS32_BUILTIN_SMALXDS,
+  NDS32_BUILTIN_V_SMALXDS,
+  NDS32_BUILTIN_SMUL16,
+  NDS32_BUILTIN_V_SMUL16,
+  NDS32_BUILTIN_SMULX16,
+  NDS32_BUILTIN_V_SMULX16,
+  NDS32_BUILTIN_UMUL16,
+  NDS32_BUILTIN_V_UMUL16,
+  NDS32_BUILTIN_UMULX16,
+  NDS32_BUILTIN_V_UMULX16,
+  NDS32_BUILTIN_SMSLDA,
+  NDS32_BUILTIN_V_SMSLDA,
+  NDS32_BUILTIN_SMSLXDA,
+  NDS32_BUILTIN_V_SMSLXDA,
+  NDS32_BUILTIN_UCLIP32,
+  NDS32_BUILTIN_SCLIP32,
+  NDS32_BUILTIN_KABS,
+  NDS32_BUILTIN_DSP_END,
+  NDS32_BUILTIN_NO_HWLOOP,
+  NDS32_BUILTIN_UNALIGNED_FEATURE,
+  NDS32_BUILTIN_ENABLE_UNALIGNED,
+  NDS32_BUILTIN_DISABLE_UNALIGNED,
+  NDS32_BUILTIN_COUNT
 };
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_ISA_V2   (nds32_arch_option == ARCH_V2)
-#define TARGET_ISA_V3   (nds32_arch_option == ARCH_V3)
-#define TARGET_ISA_V3M  (nds32_arch_option == ARCH_V3M)
+#define TARGET_ISR_VECTOR_SIZE_4_BYTE \
+  (nds32_isr_vector_size == 4)
+
+#define TARGET_ISA_V2 \
+  (nds32_arch_option == ARCH_V2 || nds32_arch_option == ARCH_V2J)
+#define TARGET_ISA_V3 \
+  (nds32_arch_option == ARCH_V3 \
+   || nds32_arch_option == ARCH_V3J \
+   || nds32_arch_option == ARCH_V3F \
+   || nds32_arch_option == ARCH_V3S)
+#define TARGET_ISA_V3M \
+  (nds32_arch_option == ARCH_V3M)
+
+#define TARGET_PIPELINE_N8 \
+  (nds32_cpu_option == CPU_N7 || nds32_cpu_option == CPU_N8)
+#define TARGET_PIPELINE_N10 \
+  (nds32_cpu_option == CPU_N9 || nds32_cpu_option == CPU_N10)
+#define TARGET_PIPELINE_N12 \
+  (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13)
+#define TARGET_PIPELINE_SIMPLE \
+  (nds32_cpu_option == CPU_SIMPLE)
+
+#define TARGET_CMODEL_SMALL \
+   (nds32_cmodel_option == CMODEL_SMALL)
+#define TARGET_CMODEL_MEDIUM \
+   (nds32_cmodel_option == CMODEL_MEDIUM)
+#define TARGET_CMODEL_LARGE \
+   (nds32_cmodel_option == CMODEL_LARGE)
+
+/* When -mcmodel=small or -mcmodel=medium,
+   compiler may generate gp-base instruction directly.  */
+#define TARGET_GP_DIRECT \
+   (nds32_cmodel_option == CMODEL_SMALL\
+    || nds32_cmodel_option == CMODEL_MEDIUM)
+
+/* There are three kinds of mul configurations:
+   1-cycle fast mul, 2-cycle fast mul, and slow mul operation.  */
+#define TARGET_MUL_FAST_1 \
+  (nds32_mul_config == MUL_TYPE_FAST_1)
+#define TARGET_MUL_FAST_2 \
+  (nds32_mul_config == MUL_TYPE_FAST_2)
+#define TARGET_MUL_SLOW \
+  (nds32_mul_config == MUL_TYPE_SLOW)
+
+/* Run-time Target Specification.  */
+#define TARGET_SOFT_FLOAT (nds32_float_abi == NDS32_FLOAT_ABI_SOFT)
+/* Use hardware floating point calling convention.  */
+#define TARGET_HARD_FLOAT (nds32_float_abi == NDS32_FLOAT_ABI_HARD)
+
+/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI,
+   1 means  hard ABI and using full floating-point instruction,
+   2 means hard ABI and only using single-precision floating-point
+   instruction  */
+#if TARGET_ARCH_DEFAULT == 1
+#  define TARGET_DEFAULT_FLOAT_ABI NDS32_FLOAT_ABI_HARD
+#  define TARGET_DEFAULT_FPU_ISA MASK_FPU_DOUBLE | MASK_FPU_SINGLE
+#  define TARGET_DEFAULT_FPU_FMA 0
+#else
+#  if TARGET_ARCH_DEFAULT == 2
+#    define TARGET_DEFAULT_FLOAT_ABI NDS32_FLOAT_ABI_HARD
+#    define TARGET_DEFAULT_FPU_ISA MASK_FPU_SINGLE
+#    define TARGET_DEFAULT_FPU_FMA 0
+#  else
+#    define TARGET_DEFAULT_FLOAT_ABI NDS32_FLOAT_ABI_SOFT
+#    define TARGET_DEFAULT_FPU_ISA 0
+#    define TARGET_DEFAULT_FPU_FMA 0
+#  endif
+#endif
+
+#define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef TARGET_DEFAULT_RELAX
+#  define NDS32_RELAX_SPEC " %{!mno-relax:--relax}"
+#else
+#  define NDS32_RELAX_SPEC " %{mrelax:--relax}"
+#endif
+
+#ifdef TARGET_OS_DEFAULT_IFC
+#  define NDS32_IFC_SPEC " %{Os3|Os|mifc:%{!mno-ifc:--mifc}}"
+#else
+#  define NDS32_IFC_SPEC " %{mifc:--mifc}"
+#endif
+
+#ifdef TARGET_OS_DEFAULT_EX9
+#  define NDS32_EX9_SPEC " %{Os3|Os|mex9:%{!mno-ex9:--mex9}}"
+#else
+#  define NDS32_EX9_SPEC " %{mex9:--mex9}"
+#endif
+
+#ifdef TARGET_DEFAULT_EXT_DSP
+#  define NDS32_EXT_DSP_SPEC "%{!mno-ext-dsp:-mext-dsp}"
+#else
+#  define NDS32_EXT_DSP_SPEC ""
+#endif
+
+#ifdef TARGET_DEFAULT_HWLOOP
+#  define NDS32_HWLOOP_SPEC "%{!mno-ext-zol:-mext-zol}"
+#else
+#  define NDS32_HWLOOP_SPEC ""
+#endif
+
+#ifdef TARGET_DEFAULT_16BIT
+#  define NDS32_16BIT_SPEC "%{!mno-16-bit:%{!mno-16bit:-m16bit}}"
+#else
+#  define NDS32_16BIT_SPEC "%{!m16-bit:%{!m16bit:-mno-16bit}}"
+#endif
 
 /* ------------------------------------------------------------------------ */
 
 /* Controlling the Compilation Driver.  */
 
+#define DRIVER_SELF_SPECS \
+  " %{mno-16bit|mno-16-bit:-mno-ifc -mno-ex9}" \
+  NDS32_16BIT_SPEC
+
 #define OPTION_DEFAULT_SPECS \
-  {"arch", "%{!march=*:-march=%(VALUE)}" }
+  {"arch", "%{!march=*:-march=%(VALUE)}" \
+	   "%{march=v3f:%{!mfloat-abi=*:-mfloat-abi=hard}" \
+	   " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}" \
+	   " %{!mno-ext-fpu-dp:%{!mext-fpu-dp:-mext-fpu-dp}}}" \
+	   "%{march=v3s:%{!mfloat-abi=*:-mfloat-abi=hard}" \
+	   " %{!mno-ext-fpu-sp:%{!mext-fpu-sp:-mext-fpu-sp}}}" }, \
+  {"cpu",  "%{!mcpu=*:-mcpu=%(VALUE)}" },   \
+  {"memory_model", "%{!mmemory-model=*:-mmemory-model=%(VALUE)}"}, \
+  {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }
 
 #define CC1_SPEC \
-  ""
+  " %{Os1:-Os -mno-ifc -mno-ex9;" \
+     "Os2:-Os -minnermost-loop;" \
+     "Os3:-Os}" \
+  NDS32_EXT_DSP_SPEC \
+  NDS32_HWLOOP_SPEC
 
 #define ASM_SPEC \
-  " %{mbig-endian:-EB} %{mlittle-endian:-EL}"
-
-/* If user issues -mrelax, -mforce-fp-as-gp, or -mex9,
-   we need to pass '--relax' to linker.
-   Besides, for -mex9, we need to further pass '--mex9'.  */
-#define LINK_SPEC \
   " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
-  " %{mrelax|mforce-fp-as-gp|mex9:--relax}" \
-  " %{mex9:--mex9}"
-
-#define LIB_SPEC \
-  " -lc -lgloss"
-
-/* The option -mno-ctor-dtor can disable constructor/destructor feature
-   by applying different crt stuff.  In the convention, crt0.o is the
-   startup file without constructor/destructor;
-   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
-   startup files with constructor/destructor.
-   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
-   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
-   currently provided by GCC for nds32 target.
-
-   For nds32 target so far:
-   If -mno-ctor-dtor, we are going to link
-   "crt0.o [user objects]".
-   If general cases, we are going to link
-   "crt1.o crtbegin1.o [user objects] crtend1.o".  */
-#define STARTFILE_SPEC \
-  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
-  " %{!mno-ctor-dtor:crtbegin1.o%s}"
-#define ENDFILE_SPEC \
-  " %{!mno-ctor-dtor:crtend1.o%s}"
-
-/* The TARGET_BIG_ENDIAN_DEFAULT is defined if we configure gcc
-   with --target=nds32be-* setting.
-   Check gcc/config.gcc for more information.
-   In addition, currently we only have elf toolchain,
-   where mgp-direct is always the default.  */
+  " %{march=*:-march=%*}" \
+  " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \
+  " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \
+  " %{mfull-regs:-mfull-regs}" \
+  " %{mreduced-regs:-mreduced-regs}" \
+  " %{mfloat-abi=hard:-mabi=v2fpp}" \
+  " %{mfloat-abi=soft:-mabi=v2}" \
+  " %{mconfig-fpu=*:-mfpu-freg=%*}" \
+  " %{mext-fpu-mac:-mmac}" \
+  " %{mno-ext-fpu-mac:-mno-mac}" \
+  " %{mext-fpu-sp:-mfpu-sp-ext}" \
+  " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \
+  " %{mext-fpu-dp:-mfpu-dp-ext}" \
+  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \
+  " %{mext-dsp:-mdsp-ext}" \
+  " %{mext-zol:-mzol-ext}" \
+  " %{O|O1|O2|O3|Ofast:-O1;:-Os}"
+
+/* The TARGET_BIG_ENDIAN_DEFAULT is defined if we
+   configure gcc with --target=nds32be-* setting.
+   Check gcc/config.gcc for more information.  */
 #ifdef TARGET_BIG_ENDIAN_DEFAULT
-#define MULTILIB_DEFAULTS { "mbig-endian", "mgp-direct" }
+#  define NDS32_ENDIAN_DEFAULT "mbig-endian"
+#else
+#  define NDS32_ENDIAN_DEFAULT "mlittle-endian"
+#endif
+
+/* Currently we only have elf toolchain,
+   where -mcmodel=medium is always the default.  */
+#if TARGET_ELF
+#  define NDS32_CMODEL_DEFAULT "mcmodel=medium"
 #else
-#define MULTILIB_DEFAULTS { "mlittle-endian", "mgp-direct" }
+#  define NDS32_CMODEL_DEFAULT "mcmodel=medium"
 #endif
 
+#define MULTILIB_DEFAULTS \
+  { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT }
+
 
 /* Run-time Target Specification.  */
 
-#define TARGET_CPU_CPP_BUILTINS()                     \
-  do                                                  \
-    {                                                 \
-      builtin_define ("__nds32__");                   \
-                                                      \
-      if (TARGET_ISA_V2)                              \
-        builtin_define ("__NDS32_ISA_V2__");          \
-      if (TARGET_ISA_V3)                              \
-        builtin_define ("__NDS32_ISA_V3__");          \
-      if (TARGET_ISA_V3M)                             \
-        builtin_define ("__NDS32_ISA_V3M__");         \
-                                                      \
-      if (TARGET_BIG_ENDIAN)                          \
-        builtin_define ("__big_endian__");            \
-      if (TARGET_REDUCED_REGS)                        \
-        builtin_define ("__NDS32_REDUCED_REGS__");    \
-      if (TARGET_CMOV)                                \
-        builtin_define ("__NDS32_CMOV__");            \
-      if (TARGET_PERF_EXT)                            \
-        builtin_define ("__NDS32_PERF_EXT__");        \
-      if (TARGET_16_BIT)                              \
-        builtin_define ("__NDS32_16_BIT__");          \
-      if (TARGET_GP_DIRECT)                           \
-        builtin_define ("__NDS32_GP_DIRECT__");       \
-                                                      \
-      builtin_assert ("cpu=nds32");                   \
-      builtin_assert ("machine=nds32");               \
-    } while (0)
+#define TARGET_CPU_CPP_BUILTINS() \
+  nds32_cpu_cpp_builtins (pfile)
 
 
 /* Defining Data Structures for Per-function Information.  */
@@ -446,10 +1059,20 @@
 
 #define STACK_BOUNDARY 64
 
-#define FUNCTION_BOUNDARY 32
+#define FUNCTION_BOUNDARY \
+  ((NDS32_ALIGN_P () || TARGET_ALIGN_FUNCTION) ? 32 : 16)
 
 #define BIGGEST_ALIGNMENT 64
 
+#define DATA_ALIGNMENT(constant, basic_align) \
+  nds32_data_alignment (constant, basic_align)
+
+#define CONSTANT_ALIGNMENT(constant, basic_align) \
+  nds32_constant_alignment (constant, basic_align)
+
+#define LOCAL_ALIGNMENT(type, basic_align) \
+  nds32_local_alignment (type, basic_align)
+
 #define EMPTY_FIELD_BOUNDARY 32
 
 #define STRUCTURE_SIZE_BOUNDARY 8
@@ -474,8 +1097,8 @@
 
 #define SIZE_TYPE "long unsigned int"
 #define PTRDIFF_TYPE "long int"
-#define WCHAR_TYPE "short unsigned int"
-#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE 32
 
 
 /* Register Usage.  */
@@ -485,7 +1108,7 @@
    from 0 to just below FIRST_PSEUDO_REGISTER.
    All registers that the compiler knows about must be given numbers,
    even those that are not normally considered general registers.  */
-#define FIRST_PSEUDO_REGISTER 34
+#define FIRST_PSEUDO_REGISTER 101
 
 /* An initializer that says which registers are used for fixed
    purposes all throughout the compiled code and are therefore
@@ -496,24 +1119,38 @@
    $r30 : $lp
    $r31 : $sp
 
-   caller-save registers: $r0 ~ $r5, $r16 ~ $r23
-   callee-save registers: $r6 ~ $r10, $r11 ~ $r14
+   caller-save registers: $r0 ~ $r5, $r16 ~ $r23, $fs0 ~ $fs5, $fs22 ~ $fs47
+   callee-save registers: $r6 ~ $r10, $r11 ~ $r14, $fs6 ~ $fs21, $fs48 ~ $fs63
 
    reserved for assembler : $r15
    reserved for other use : $r24, $r25, $r26, $r27 */
-#define FIXED_REGISTERS                 \
-{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
-      0,  0,  0,  0,  0,  0,  0,  0,    \
-  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
-      0,  0,  0,  0,  0,  0,  0,  1,    \
-  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
-      0,  0,  0,  0,  0,  0,  0,  0,    \
-  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
-      1,  1,  1,  1,  0,  1,  0,  1,    \
-  /* ARG_POINTER:32 */                  \
-      1,                                \
-  /* FRAME_POINTER:33 */                \
-      1                                 \
+#define FIXED_REGISTERS \
+{ /* r0   r1   r2   r3   r4   r5   r6   r7   */ \
+      0,   0,   0,   0,   0,   0,   0,   0,     \
+  /* r8   r9   r10  r11  r12  r13  r14  r15  */ \
+      0,   0,   0,   0,   0,   0,   0,   0,     \
+  /* r16  r17  r18  r19  r20  r21  r22  r23  */ \
+      0,   0,   0,   0,   0,   0,   0,   0,     \
+  /* r24  r25  r26  r27  r28  r29  r30  r31  */ \
+      0,   0,   1,   1,   0,   1,   0,   1,     \
+  /* AP   FP   fs0  fs1  fs2  fs3  fs4  fs5  */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs6  fs7  fs8  fs9  fs10 fs11 fs12 fs13 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs30 fs31 fd16      fd17      fd18      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd19      fd20      fd21      fd22      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd23      fd24      fd25      fd26      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd27      fd28      fd29      fd30      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd31      LB   LE   LC                  */ \
+      1,   1,   1,   1,   1                     \
 }
 
 /* Identifies the registers that are not available for
@@ -522,38 +1159,62 @@
 
    0 : callee-save registers
    1 : caller-save registers */
-#define CALL_USED_REGISTERS             \
-{ /* r0  r1  r2  r3  r4  r5  r6  r7  */ \
-      1,  1,  1,  1,  1,  1,  0,  0,    \
-  /* r8  r9  r10 r11 r12 r13 r14 r15 */ \
-      0,  0,  0,  0,  0,  0,  0,  1,    \
-  /* r16 r17 r18 r19 r20 r21 r22 r23 */ \
-      1,  1,  1,  1,  1,  1,  1,  1,    \
-  /* r24 r25 r26 r27 r28 r29 r30 r31 */ \
-      1,  1,  1,  1,  0,  1,  0,  1,    \
-  /* ARG_POINTER:32 */                  \
-      1,                                \
-  /* FRAME_POINTER:33 */                \
-      1                                 \
+#define CALL_USED_REGISTERS \
+{ /* r0   r1   r2   r3   r4   r5   r6   r7   */ \
+      1,   1,   1,   1,   1,   1,   0,   0,     \
+  /* r8   r9   r10  r11  r12  r13  r14  r15  */ \
+      0,   0,   0,   0,   0,   0,   0,   1,     \
+  /* r16  r17  r18  r19  r20  r21  r22  r23  */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* r24  r25  r26  r27  r28  r29  r30  r31  */ \
+      1,   1,   1,   1,   0,   1,   0,   1,     \
+  /* AP   FP   fs0  fs1  fs2  fs3  fs4  fs5  */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs6  fs7  fs8  fs9  fs10 fs11 fs12 fs13 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs14 fs15 fs16 fs17 fs18 fs19 fs20 fs21 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs22 fs23 fs24 fs25 fs26 fs27 fs28 fs29 */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fs30 fs31 fd16      fd17      fd18      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd19      fd20      fd21      fd22      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd23      fd24      fd25      fd26      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd27      fd28      fd29      fd30      */ \
+      1,   1,   1,   1,   1,   1,   1,   1,     \
+  /* fd31      LB   LE   LC                  */ \
+      1,   1,   1,   1,   1                     \
 }
 
 /* In nds32 target, we have three levels of registers:
      LOW_COST_REGS    : $r0 ~ $r7
      MIDDLE_COST_REGS : $r8 ~ $r11, $r16 ~ $r19
      HIGH_COST_REGS   : $r12 ~ $r14, $r20 ~ $r31 */
-#define REG_ALLOC_ORDER           \
-{                                 \
-   0,  1,  2,  3,  4,  5,  6,  7, \
-   8,  9, 10, 11, 16, 17, 18, 19, \
-  12, 13, 14, 15, 20, 21, 22, 23, \
-  24, 25, 26, 27, 28, 29, 30, 31, \
-  32,                             \
-  33                              \
+#define REG_ALLOC_ORDER \
+{   0,   1,   2,   3,   4,   5,   6,   7, \
+   16,  17,  18,  19,   9,  10,  11,  12, \
+   13,  14,  8,   15,  20,  21,  22,  23, \
+   24,  25,  26,  27,  28,  29,  30,  31, \
+   32,  33,  34,  35,  36,  37,  38,  39, \
+   40,  41,  42,  43,  44,  45,  46,  47, \
+   48,  49,  50,  51,  52,  53,  54,  55, \
+   56,  57,  58,  59,  60,  61,  62,  63, \
+   64,  65,  66,  67,  68,  69,  70,  71, \
+   72,  73,  74,  75,  76,  77,  78,  79, \
+   80,  81,  82,  83,  84,  85,  86,  87, \
+   88,  89,  90,  91,  92,  93,  94,  95, \
+   96,  97,  98,  99, 100,                \
 }
 
+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
+   to be rearranged based on optimizing for speed or size.  */
+#define ADJUST_REG_ALLOC_ORDER nds32_adjust_reg_alloc_order ()
+
 /* Tell IRA to use the order we define rather than messing it up with its
    own cost calculations.  */
-#define HONOR_REG_ALLOC_ORDER
+#define HONOR_REG_ALLOC_ORDER optimize_size
 
 /* The number of consecutive hard regs needed starting at
    reg "regno" for holding a value of mode "mode".  */
@@ -587,13 +1248,18 @@
 enum reg_class
 {
   NO_REGS,
+  R5_REG,
+  R8_REG,
   R15_TA_REG,
   STACK_REG,
+  FRAME_POINTER_REG,
   LOW_REGS,
   MIDDLE_REGS,
   HIGH_REGS,
   GENERAL_REGS,
   FRAME_REGS,
+  FP_REGS,
+  LOOP_REGS,
   ALL_REGS,
   LIM_REG_CLASSES
 };
@@ -603,27 +1269,50 @@
 #define REG_CLASS_NAMES \
 {                       \
   "NO_REGS",            \
+  "R5_REG",             \
+  "R8_REG",             \
   "R15_TA_REG",         \
   "STACK_REG",          \
+  "FRAME_POINTER_REG",  \
   "LOW_REGS",           \
   "MIDDLE_REGS",        \
   "HIGH_REGS",          \
   "GENERAL_REGS",       \
   "FRAME_REGS",         \
+  "FP_REGS",            \
+  "LOOP_REGS",          \
   "ALL_REGS"            \
 }
 
 #define REG_CLASS_CONTENTS \
-{                                                            \
-  {0x00000000, 0x00000000}, /* NO_REGS     :              */ \
-  {0x00008000, 0x00000000}, /* R15_TA_REG  : 15           */ \
-  {0x80000000, 0x00000000}, /* STACK_REG   : 31           */ \
-  {0x000000ff, 0x00000000}, /* LOW_REGS    : 0-7          */ \
-  {0x000f0fff, 0x00000000}, /* MIDDLE_REGS : 0-11, 16-19  */ \
-  {0xfff07000, 0x00000000}, /* HIGH_REGS   : 12-14, 20-31 */ \
-  {0xffffffff, 0x00000000}, /* GENERAL_REGS: 0-31         */ \
-  {0x00000000, 0x00000003}, /* FRAME_REGS  : 32, 33       */ \
-  {0xffffffff, 0x00000003}  /* ALL_REGS    : 0-31, 32, 33 */ \
+{ /* NO_REGS                                    */  \
+  {0x00000000, 0x00000000, 0x00000000, 0x00000000}, \
+  /* R5_REG              : 5                    */  \
+  {0x00000020, 0x00000000, 0x00000000, 0x00000000}, \
+  /* R8_REG              : 8                    */  \
+  {0x00000100, 0x00000000, 0x00000000, 0x00000000}, \
+  /* R15_TA_REG          : 15                   */  \
+  {0x00008000, 0x00000000, 0x00000000, 0x00000000}, \
+  /* STACK_REG           : 31                   */  \
+  {0x80000000, 0x00000000, 0x00000000, 0x00000000}, \
+  /* FRAME_POINTER_REG   : 28                   */  \
+  {0x10000000, 0x00000000, 0x00000000, 0x00000000}, \
+  /* LOW_REGS            : 0-7                  */  \
+  {0x000000ff, 0x00000000, 0x00000000, 0x00000000}, \
+  /* MIDDLE_REGS         : 0-11, 16-19          */  \
+  {0x000f0fff, 0x00000000, 0x00000000, 0x00000000}, \
+  /* HIGH_REGS           : 12-14, 20-31         */  \
+  {0xfff07000, 0x00000000, 0x00000000, 0x00000000}, \
+  /* GENERAL_REGS        : 0-31                 */  \
+  {0xffffffff, 0x00000000, 0x00000000, 0x00000000}, \
+  /* FRAME_REGS          : 32, 33               */  \
+  {0x00000000, 0x00000003, 0x00000000, 0x00000000}, \
+  /* FP_REGS             : 34-98                */  \
+  {0x00000000, 0xfffffffc, 0xffffffff, 0x00000003}, \
+  /* LOOP_REGS             99-101               */  \
+  {0x00000000, 0x00000000, 0x00000000, 0x0000001c}, \
+  /* ALL_REGS            : 0-101                */  \
+  {0xffffffff, 0xffffffff, 0xffffffff, 0x0000001f}  \
 }
 
 #define REGNO_REG_CLASS(regno) nds32_regno_reg_class (regno)
@@ -631,13 +1320,18 @@
 #define BASE_REG_CLASS GENERAL_REGS
 #define INDEX_REG_CLASS GENERAL_REGS
 
+#define TEST_REGNO(R, TEST, VALUE) \
+  ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE))
+
 /* Return nonzero if it is suitable for use as a
    base register in operand addresses.
    So far, we return nonzero only if "num" is a hard reg
    of the suitable class or a pseudo register which is
    allocated to a suitable hard reg.  */
 #define REGNO_OK_FOR_BASE_P(num) \
-  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
+  (TEST_REGNO (num, <, 32) \
+   || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \
+   || TEST_REGNO (num, ==, ARG_POINTER_REGNUM))
 
 /* Return nonzero if it is suitable for use as a
    index register in operand addresses.
@@ -647,7 +1341,15 @@
    The difference between an index register and a base register is that
    the index register may be scaled.  */
 #define REGNO_OK_FOR_INDEX_P(num) \
-  ((num) < 32 || (unsigned) reg_renumber[num] < 32)
+  (TEST_REGNO (num, <, 32) \
+   || TEST_REGNO (num, ==, FRAME_POINTER_REGNUM) \
+   || TEST_REGNO (num, ==, ARG_POINTER_REGNUM))
+
+/* Don't spill double-precision register to two singal-precision registers  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+ ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)        \
+  && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)   \
+  ? reg_classes_intersect_p (CLASS, FP_REGS) : 0)
 
 
 /* Obsolete Macros for Defining Constraints.  */
@@ -663,7 +1365,13 @@
 
 #define STACK_POINTER_OFFSET 0
 
-#define FIRST_PARM_OFFSET(fundecl) 0
+#define FIRST_PARM_OFFSET(fundecl) \
+  (NDS32_DOUBLE_WORD_ALIGN_P (crtl->args.pretend_args_size) ? 0 : 4)
+
+/* A C expression whose value is RTL representing the address in a stack frame
+   where the pointer to the caller's frame is stored.  */
+#define DYNAMIC_CHAIN_ADDRESS(frameaddr) \
+  nds32_dynamic_chain_address (frameaddr)
 
 #define RETURN_ADDR_RTX(count, frameaddr) \
   nds32_return_addr_rtx (count, frameaddr)
@@ -676,6 +1384,13 @@
 #define INCOMING_RETURN_ADDR_RTX    gen_rtx_REG (Pmode, LP_REGNUM)
 #define DWARF_FRAME_RETURN_COLUMN   DWARF_FRAME_REGNUM (LP_REGNUM)
 
+/* Use $r0 $r1 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N) : INVALID_REGNUM)
+/* The register $r2 that represents a location in which to store a stack
+   adjustment to be applied before function return.
+   This is used to unwind the stack to an exception handler's call frame.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2)
+
 #define STACK_POINTER_REGNUM SP_REGNUM
 
 #define FRAME_POINTER_REGNUM 33
@@ -704,12 +1419,11 @@
 #define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \
   nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args)
 
-/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0.
-   We better cast REGNO into signed integer so that we can avoid
-   'comparison of unsigned expression >= 0 is always true' warning.  */
-#define FUNCTION_ARG_REGNO_P(regno)                                        \
-  (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0)                         \
-   && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_REGS_FOR_ARGS))
+#define FUNCTION_ARG_REGNO_P(regno)                                           \
+ (IN_RANGE ((regno), NDS32_FIRST_GPR_REGNUM, NDS32_MAX_GPR_REGS_FOR_ARGS - 1) \
+  || ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)                                \
+      && IN_RANGE ((regno), NDS32_FPR_ARG_FIRST_REGNUM,                       \
+		   NDS32_FIRST_FPR_REGNUM + NDS32_MAX_FPR_REGS_FOR_ARGS - 1)))
 
 #define DEFAULT_PCC_STRUCT_RETURN 0
 
@@ -738,13 +1452,13 @@
    The trampoline code for nds32 target must contains following parts:
 
      1. instructions (4 * 4 = 16 bytes):
-          get $pc first
-          load chain_value to static chain register via $pc
-          load nested function address to $r15 via $pc
-          jump to desired nested function via $r15
+	  get $pc first
+	  load chain_value to static chain register via $pc
+	  load nested function address to $r15 via $pc
+	  jump to desired nested function via $r15
      2. data (4 * 2 = 8 bytes):
-          chain_value
-          nested function address
+	  chain_value
+	  nested function address
 
    Please check nds32.c implementation for more information.  */
 #define TRAMPOLINE_SIZE 24
@@ -769,9 +1483,16 @@
 /* We have "LW.bi   Rt, [Ra], Rb" instruction form.  */
 #define HAVE_POST_MODIFY_REG  1
 
+#define USE_LOAD_POST_INCREMENT(mode) \
+  (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode))
+#define USE_LOAD_POST_DECREMENT(mode) \
+  (GET_MODE_SIZE (mode) <= GET_MODE_SIZE(DImode))
+#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode)
+#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode)
+
 #define CONSTANT_ADDRESS_P(x) (CONSTANT_P (x) && GET_CODE (x) != CONST_DOUBLE)
 
-#define MAX_REGS_PER_ADDRESS 2
+#define MAX_REGS_PER_ADDRESS 3
 
 
 /* Anchored Addresses.  */
@@ -785,7 +1506,11 @@
 /* A C expression for the cost of a branch instruction.
    A value of 1 is the default;
    other values are interpreted relative to that.  */
-#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 0)
+#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 1)
+
+/* Override BRANCH_COST heuristic which empirically produces worse
+   performance for removing short circuiting from the logical ops.  */
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
 
 #define SLOW_BYTE_ACCESS 1
 
@@ -813,14 +1538,21 @@
 
 /* Position Independent Code.  */
 
+#define PIC_OFFSET_TABLE_REGNUM GP_REGNUM
+
+#define SYMBOLIC_CONST_P(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
 
 /* Defining the Output Assembler Language.  */
 
 #define ASM_COMMENT_START "!"
 
-#define ASM_APP_ON "! #APP"
+#define ASM_APP_ON "! #APP\n.inline_asm_begin\n"
 
-#define ASM_APP_OFF "! #NO_APP\n"
+#define ASM_APP_OFF "! #NO_APP\n.inline_asm_end\n"
 
 #define ASM_OUTPUT_LABELREF(stream, name) \
   asm_fprintf (stream, "%U%s", (*targetm.strip_name_encoding) (name))
@@ -833,14 +1565,56 @@
 
 #define LOCAL_LABEL_PREFIX "."
 
-#define REGISTER_NAMES                                            \
-{                                                                 \
-  "$r0",  "$r1",  "$r2",  "$r3",  "$r4",  "$r5",  "$r6",  "$r7",  \
+#define REGISTER_NAMES \
+{ "$r0",  "$r1",  "$r2",  "$r3",  "$r4",  "$r5",  "$r6",  "$r7",  \
   "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$ta",  \
   "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \
   "$r24", "$r25", "$r26", "$r27", "$fp",  "$gp",  "$lp",  "$sp",  \
-  "$AP",                                                          \
-  "$SFP"                                                          \
+  "$AP",  "$SFP", "$fs0", "$fs1", "$fs2", "$fs3", "$fs4", "$fs5", \
+  "$fs6", "$fs7", "$fs8", "$fs9", "$fs10","$fs11","$fs12","$fs13",\
+  "$fs14","$fs15","$fs16","$fs17","$fs18","$fs19","$fs20","$fs21",\
+  "$fs22","$fs23","$fs24","$fs25","$fs26","$fs27","$fs28","$fs29",\
+  "$fs30","$fs31","$fs32","$fs33","$fs34","$fs35","$fs36","$fs37",\
+  "$fs38","$fs39","$fs40","$fs41","$fs42","$fs43","$fs44","$fs45",\
+  "$fs46","$fs47","$fs48","$fs49","$fs50","$fs51","$fs52","$fs53",\
+  "$fs54","$fs55","$fs56","$fs57","$fs58","$fs59","$fs60","$fs61",\
+  "$fs62","$fs63",   "LB",   "LE",   "LC"                         \
+}
+
+#define OVERLAPPING_REGISTER_NAMES		\
+{						\
+  {"$fd0",  NDS32_FIRST_FPR_REGNUM + 0,  2},	\
+  {"$fd1",  NDS32_FIRST_FPR_REGNUM + 2,  2},	\
+  {"$fd2",  NDS32_FIRST_FPR_REGNUM + 4,  2},	\
+  {"$fd3",  NDS32_FIRST_FPR_REGNUM + 6,  2},	\
+  {"$fd4",  NDS32_FIRST_FPR_REGNUM + 8,  2},	\
+  {"$fd5",  NDS32_FIRST_FPR_REGNUM + 10, 2},	\
+  {"$fd6",  NDS32_FIRST_FPR_REGNUM + 12, 2},	\
+  {"$fd7",  NDS32_FIRST_FPR_REGNUM + 14, 2},	\
+  {"$fd8",  NDS32_FIRST_FPR_REGNUM + 16, 2},	\
+  {"$fd9",  NDS32_FIRST_FPR_REGNUM + 18, 2},	\
+  {"$fd10", NDS32_FIRST_FPR_REGNUM + 20, 2},	\
+  {"$fd11", NDS32_FIRST_FPR_REGNUM + 22, 2},	\
+  {"$fd12", NDS32_FIRST_FPR_REGNUM + 24, 2},	\
+  {"$fd13", NDS32_FIRST_FPR_REGNUM + 26, 2},	\
+  {"$fd14", NDS32_FIRST_FPR_REGNUM + 28, 2},	\
+  {"$fd15", NDS32_FIRST_FPR_REGNUM + 30, 2},	\
+  {"$fd16", NDS32_FIRST_FPR_REGNUM + 32, 2},	\
+  {"$fd17", NDS32_FIRST_FPR_REGNUM + 34, 2},	\
+  {"$fd18", NDS32_FIRST_FPR_REGNUM + 36, 2},	\
+  {"$fd19", NDS32_FIRST_FPR_REGNUM + 38, 2},	\
+  {"$fd20", NDS32_FIRST_FPR_REGNUM + 40, 2},	\
+  {"$fd21", NDS32_FIRST_FPR_REGNUM + 42, 2},	\
+  {"$fd22", NDS32_FIRST_FPR_REGNUM + 44, 2},	\
+  {"$fd23", NDS32_FIRST_FPR_REGNUM + 46, 2},	\
+  {"$fd24", NDS32_FIRST_FPR_REGNUM + 48, 2},	\
+  {"$fd25", NDS32_FIRST_FPR_REGNUM + 50, 2},	\
+  {"$fd26", NDS32_FIRST_FPR_REGNUM + 52, 2},	\
+  {"$fd27", NDS32_FIRST_FPR_REGNUM + 54, 2},	\
+  {"$fd28", NDS32_FIRST_FPR_REGNUM + 56, 2},	\
+  {"$fd29", NDS32_FIRST_FPR_REGNUM + 58, 2},	\
+  {"$fd30", NDS32_FIRST_FPR_REGNUM + 60, 2},	\
+  {"$fd31", NDS32_FIRST_FPR_REGNUM + 62, 2},	\
 }
 
 /* Output normal jump table entry.  */
@@ -852,19 +1626,19 @@
   do                                                                    \
     {                                                                   \
       switch (GET_MODE (body))                                          \
-        {                                                               \
-        case QImode:                                                    \
-          asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel);     \
-          break;                                                        \
-        case HImode:                                                    \
-          asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel);    \
-          break;                                                        \
-        case SImode:                                                    \
-          asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel);     \
-          break;                                                        \
-        default:                                                        \
-          gcc_unreachable();                                            \
-        }                                                               \
+	{                                                               \
+	case QImode:                                                    \
+	  asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel);     \
+	  break;                                                        \
+	case HImode:                                                    \
+	  asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel);    \
+	  break;                                                        \
+	case SImode:                                                    \
+	  asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel);     \
+	  break;                                                        \
+	default:                                                        \
+	  gcc_unreachable();                                            \
+	}                                                               \
     } while (0)
 
 /* We have to undef it first because elfos.h formerly define it
@@ -881,10 +1655,10 @@
   do                                                   \
     {                                                  \
       /* Because our jump table is in text section,    \
-         we need to make sure 2-byte alignment after   \
-         the jump table for instructions fetch.  */    \
+	 we need to make sure 2-byte alignment after   \
+	 the jump table for instructions fetch.  */    \
       if (GET_MODE (PATTERN (table)) == QImode)        \
-        ASM_OUTPUT_ALIGN (stream, 1);                  \
+	ASM_OUTPUT_ALIGN (stream, 1);                  \
       asm_fprintf (stream, "\t! Jump Table End\n");    \
     }  while (0)
 
@@ -948,9 +1722,7 @@
 /* Return the preferred mode for and addr_diff_vec when the mininum
    and maximum offset are known.  */
 #define CASE_VECTOR_SHORTEN_MODE(min_offset, max_offset, body)  \
-   ((min_offset < 0 || max_offset >= 0x2000 ) ? SImode          \
-   : (max_offset >= 100) ? HImode                               \
-   : QImode)
+  nds32_case_vector_shorten_mode (min_offset, max_offset, body)
 
 /* Generate pc relative jump table when -fpic or -Os.  */
 #define CASE_VECTOR_PC_RELATIVE (flag_pic || optimize_size)
@@ -983,6 +1755,11 @@
    when the condition is true.  */
 #define STORE_FLAG_VALUE 1
 
+/* A C expression that indicates whether the architecture defines a value for
+   clz or ctz with a zero operand.  In nds32 clz for 0 result 32 is defined
+   in ISA spec */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+
 /* An alias for the machine mode for pointers.  */
 #define Pmode SImode
 
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-hwloop.c gcc-4.9.4/gcc/config/nds32/nds32-hwloop.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-hwloop.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-hwloop.c	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,934 @@
+/* hwloop pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stor-layout.h"
+#include "varasm.h"
+#include "calls.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "cfgloop.h"
+#include "context.h"
+#include "params.h"
+#include "cpplib.h"
+#include "hw-doloop.h"
+
+static int hwloop_group_id = 0;
+
+/* A callback for the hw-doloop pass.  This function examines INSN; if
+   it is a doloop_end pattern we recognize, return the reg rtx for the
+   loop counter.  Otherwise, return NULL_RTX.  */
+static rtx
+hwloop_pattern_reg (rtx insn)
+{
+  rtx reg;
+
+  if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
+    return NULL_RTX;
+
+  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+  if (!REG_P (reg))
+    return NULL_RTX;
+  return reg;
+}
+
+/* Optimize Loop */
+static bool
+hwloop1_optimize (hwloop_info loop)
+{
+  basic_block bb, new_bb, outer_new_bb;
+  edge e, outer_e;
+  edge_iterator ei, outer_ei;
+  rtx insn, last_insn, cfg_insn, recog_insn;
+  rtx start_label;
+  rtx iter_reg;
+  rtx seq, seq_end;
+  hwloop_info inner;
+  unsigned ix;
+  bool same_depth_p = false;
+
+  if (loop->jumps_outof)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d jumps out of loop body.\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (!loop->incoming_dest)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has more than one entry\n",
+		 loop->loop_no);
+      return true;
+    }
+
+  if (loop->incoming_dest != loop->head)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d is not entered from head\n",
+		 loop->loop_no);
+      return true;
+    }
+
+  if (loop->has_call || loop->has_asm)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has invalid insn\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  gcc_assert (REG_P (iter_reg));
+
+  if (loop->incoming_src)
+    {
+      /* Make sure the predecessor is before the loop start label,
+	 as required by the loop setup instructions.  */
+      insn = BB_END (loop->incoming_src);
+
+      if (vec_safe_length (loop->incoming) > 1
+	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (insn));
+
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d loop setup space has jump insn,"
+		     " before loop_start\n", loop->loop_no);
+	  return true;
+	}
+
+      while (insn && insn != loop->start_label)
+	insn = NEXT_INSN (insn);
+
+      if (!insn)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d loop setup not before loop_start\n",
+		     loop->loop_no);
+	  return true;
+	}
+    }
+
+  /* Check if start_label appears before loop_end and.  */
+  insn = loop->start_label;
+  while (insn && insn != loop->loop_end)
+    insn = NEXT_INSN (insn);
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      return true;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - Returns  */
+
+  bb = loop->tail;
+  last_insn = PREV_INSN (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != BB_HEAD (bb);
+	   last_insn = PREV_INSN (last_insn))
+	if (NONDEBUG_INSN_P (last_insn))
+	  break;
+
+      if (last_insn != BB_HEAD (bb))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
+	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      return true;
+    }
+
+  if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      return true;
+    }
+
+  /* Check unspec_hwloop pattern on first basic block.  */
+  for (insn = BB_HEAD (loop->tail) ; insn != BB_END (loop->tail);
+       insn = NEXT_INSN (insn))
+    {
+      if (recog_memoized (insn) == CODE_FOR_unspec_no_hwloop)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad instruction on first BB\n",
+		     loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check unspec_hwloop pattern on last basic block.  */
+  for (insn = BB_HEAD (loop->head); insn != BB_END (loop->head);
+       insn = NEXT_INSN (insn))
+    {
+      if (recog_memoized (insn) == CODE_FOR_unspec_no_hwloop)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad instruction on last BB\n",
+		     loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check inner loop have Hardware loop.  */
+  for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
+    {
+      if ((loop->loop_no != inner->loop_no)
+	  && !inner->bad)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Inner loop %d have HW-loop in loop: %d\n",
+		     inner->loop_no, loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check same loop depth in nesting loop.  */
+  for (ix = 0; loop->outermost->loops.iterate (ix, &inner); ix++)
+    {
+      /* Check real_depth same other loop real_depth.  */
+      if ((loop->loop_no != inner->loop_no)
+	  && (loop->real_depth == inner->real_depth))
+	same_depth_p = true;
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";;loop %d depth: %d",
+		   loop->loop_no, loop->depth);
+	  fprintf (dump_file, " inner %d depth %d\n",
+		   inner->loop_no, inner->real_depth);
+	}
+    }
+
+  /* In all other cases, try to replace a bad last insn with a nop.  */
+  if (JUMP_P (last_insn)
+      || CALL_P (last_insn)
+      || recog_memoized (last_insn) == CODE_FOR_return_internal
+      || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+      || asm_noperands (PATTERN (last_insn)) >= 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
+		 loop->loop_no);
+
+      bb = BLOCK_FOR_INSN (last_insn);
+      last_insn = emit_insn_after (gen_unspec_nop (), BB_HEAD (bb->next_bb));
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  iter_reg = loop->iter_reg;
+
+  SET_REGNO_REG_SET (loop->regs_set_in_loop, LC_REGNUM);
+
+  /* Create a sequence containing the loop setup.  */
+  start_sequence ();
+
+  if (loop->loop_no == loop->outermost->loop_no
+      || same_depth_p)
+    {
+      /* Insert start place for LB.  */
+      recog_insn = emit_insn (gen_mtlbi_hint (gen_rtx_LABEL_REF (Pmode,
+								 start_label),
+			      GEN_INT (hwloop_group_id)));
+      recog_memoized (recog_insn);
+    }
+
+  /* Insert counter for LC.  */
+  seq_end = emit_insn (gen_init_lc (iter_reg, GEN_INT (hwloop_group_id)));
+  recog_memoized (seq_end);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, seq_end);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  seq = get_insns ();
+  end_sequence ();
+
+  /* Create new basic block, before loop->head.  */
+  emit_insn_before (seq, BB_HEAD (loop->head));
+  seq = emit_label_before (gen_label_rtx (), seq);
+
+  new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
+
+  /* Copy prev BB flags and frequency.  */
+  BB_COPY_PARTITION (new_bb, new_bb->prev_bb);
+  new_bb->frequency = new_bb->prev_bb->frequency;
+
+  FOR_EACH_EDGE (e, ei, loop->incoming)
+    {
+      if (!(e->flags & EDGE_FALLTHRU)
+	  || e->dest != loop->head)
+	redirect_edge_and_branch_force (e, new_bb);
+      else
+	redirect_edge_succ (e, new_bb);
+    }
+
+  /* The new edge from outer_new_bb to loop->head
+     is FALLTHRU.  */
+  make_single_succ_edge (new_bb, loop->head, EDGE_FALLTHRU);
+
+  /* Get loop_insn note and delete loop_end insn.   */
+  rtx note = find_reg_note (loop->loop_end, REG_BR_PROB, 0);
+  delete_insn (loop->loop_end);
+
+  /* Insert the CFG information after the last instruction of the loop.  */
+  cfg_insn = emit_jump_insn_after (gen_hwloop_cfg (GEN_INT (hwloop_group_id),
+				   gen_rtx_LABEL_REF (Pmode, start_label)),
+				   BB_END (loop->tail));
+  if (note)
+    add_int_reg_note (cfg_insn, REG_BR_PROB, INTVAL (note));
+
+  recog_memoized (cfg_insn);
+  JUMP_LABEL (cfg_insn) = loop->start_label;
+  LABEL_NUSES (loop->start_label)++;
+
+  if (loop->loop_no != loop->outermost->loop_no
+      && !same_depth_p)
+    {
+      start_sequence ();
+      /* Insert start place for LB.  */
+      seq_end = emit_insn (gen_mtlbi_hint (gen_rtx_LABEL_REF (Pmode,
+							      start_label),
+			   GEN_INT (hwloop_group_id)));
+      recog_memoized (seq_end);
+      seq = get_insns ();
+      end_sequence ();
+
+      emit_insn_before (seq, BB_HEAD (loop->outermost->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      /* Create new basic block, before loop->outermost->head.  */
+      outer_new_bb = create_basic_block (seq, seq_end,
+					 loop->outermost->head->prev_bb);
+
+      /* Copy prev BB flags and frequency.  */
+      BB_COPY_PARTITION (outer_new_bb, outer_new_bb->prev_bb);
+      outer_new_bb->frequency = outer_new_bb->prev_bb->frequency;
+
+      FOR_EACH_EDGE (outer_e, outer_ei, loop->outermost->incoming)
+	{
+	  if (!(outer_e->flags & EDGE_FALLTHRU)
+	      || outer_e->dest != loop->outermost->head)
+	    redirect_edge_and_branch_force (outer_e, outer_new_bb);
+	  else
+	    redirect_edge_succ (outer_e, outer_new_bb);
+	}
+
+      /* The new edge from outer_new_bb to loop->outermost->head
+	 is FALLTHRU.  */
+      make_single_succ_edge (outer_new_bb, loop->outermost->head,
+			     EDGE_FALLTHRU);
+    }
+  hwloop_group_id++;
+  return true;
+}
+
+/* Optimize Loop */
+static bool
+hwloop2_optimize (hwloop_info loop)
+{
+  basic_block bb, loop_bb;
+  rtx insn, last_insn, iter_reg;
+  rtx start_label, end_label;
+  rtx lc_reg, lb_reg, le_reg;
+  rtx seq, seq_end;
+  hwloop_info inner;
+  unsigned ix;
+  bool same_depth_p = false;
+
+  if (loop->jumps_outof)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d jumps out of loop body.\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (!loop->incoming_dest)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has more than one entry\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (loop->incoming_dest != loop->head)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d is not entered from head\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (loop->has_call || loop->has_asm)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has invalid insn\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (loop->iter_reg_used || loop->iter_reg_used_outside)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d uses iterator\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  gcc_assert (REG_P (iter_reg));
+
+  if (loop->incoming_src)
+    {
+      /* Make sure the predecessor is before the loop start label,
+	 as required by the loop setup instructions.  */
+      insn = BB_END (loop->incoming_src);
+
+      if (vec_safe_length (loop->incoming) > 1
+	  || !(loop->incoming->last ()->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (insn));
+
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d loop setup space has jump insn,"
+		     " before loop_start\n", loop->loop_no);
+	  return false;
+	}
+
+      while (insn && insn != loop->start_label)
+	insn = NEXT_INSN (insn);
+
+      if (!insn)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d loop setup not before loop_start\n",
+		     loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check if start_label appears before loop_end and.  */
+  insn = loop->start_label;
+  while (insn && insn != loop->loop_end)
+    insn = NEXT_INSN (insn);
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - Returns  */
+
+  bb = loop->tail;
+  last_insn = PREV_INSN (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != BB_HEAD (bb);
+	   last_insn = PREV_INSN (last_insn))
+	if (NONDEBUG_INSN_P (last_insn))
+	  break;
+
+      if (last_insn != BB_HEAD (bb))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
+	  && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      return false;
+    }
+
+  /* Check unspec_hwloop pattern on first basic block.  */
+  for (insn = BB_HEAD (loop->tail) ; insn != BB_END (loop->tail);
+       insn = NEXT_INSN (insn))
+    {
+      if (recog_memoized (insn) == CODE_FOR_unspec_no_hwloop)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad instruction on first BB\n",
+		     loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check unspec_hwloop pattern on last basic block.  */
+  for (insn = BB_HEAD (loop->head); insn != BB_END (loop->head);
+       insn = NEXT_INSN (insn))
+    {
+      if (recog_memoized (insn) == CODE_FOR_unspec_no_hwloop)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d has bad instruction on last BB\n",
+		     loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check inner loop have Hardware loop by hwloop pass.  */
+  for (ix = 0; loop->blocks.iterate (ix, &loop_bb); ix++)
+    {
+      for (insn = BB_HEAD (loop_bb);
+	   insn != NEXT_INSN (BB_END (loop_bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_init_lc)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, ";; The inner loop %d has HW-loop\n",
+			 loop->loop_no);
+	      return false;
+	    }
+	}
+    }
+
+  /* Check inner loop have Hardware loop.  */
+  for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
+    {
+      if ((loop->loop_no != inner->loop_no)
+	  && !inner->bad)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Inner loop %d have HW-loop in loop: %d\n",
+		     inner->loop_no, loop->loop_no);
+	  return false;
+	}
+    }
+
+  /* Check outer loop have Hardware loop.  */
+  for (ix = 0; loop->outermost->loops.iterate (ix, &inner); ix++)
+    {
+      if ((loop->loop_no != inner->loop_no)
+	  && !inner->bad)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";;loop %d depth: %d",
+		       loop->loop_no, loop->depth);
+	      fprintf (dump_file, " inner %d depth %d\n",
+		       inner->loop_no, inner->real_depth);
+	    }
+	  return false;
+	}
+    }
+
+  /* Check same loop depth in nesting loop.  */
+  for (ix = 0; loop->outermost->loops.iterate (ix, &inner); ix++)
+    {
+      /* Check real_depth same other loop real_depth.  */
+      if ((loop->loop_no != inner->loop_no)
+	  && (loop->real_depth == inner->real_depth))
+	same_depth_p = true;
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";;loop %d depth: %d",
+		   loop->loop_no, loop->depth);
+	  fprintf (dump_file, " inner %d depth %d\n",
+		   inner->loop_no, inner->real_depth);
+	}
+    }
+
+  /* In all other cases, try to replace a bad last insn with a nop.  */
+  if (JUMP_P (last_insn)
+      || CALL_P (last_insn)
+      || recog_memoized (last_insn) == CODE_FOR_return_internal
+      || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+      || asm_noperands (PATTERN (last_insn)) >= 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
+		 loop->loop_no);
+      last_insn = emit_insn_after (gen_unspec_nop (), last_insn);
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  end_label = gen_label_rtx ();
+  iter_reg = loop->iter_reg;
+
+  lb_reg = gen_rtx_REG (SImode, LB_REGNUM);
+  le_reg = gen_rtx_REG (SImode, LE_REGNUM);
+  lc_reg = gen_rtx_REG (SImode, LC_REGNUM);
+  SET_REGNO_REG_SET (loop->regs_set_in_loop, LC_REGNUM);
+
+  loop->end_label = end_label;
+
+  /* Create a sequence containing the loop setup.  */
+  start_sequence ();
+
+  if (loop->loop_no == loop->outermost->loop_no
+      || same_depth_p)
+    {
+      /* Insert start place for LB.  */
+      emit_insn (gen_mtlbi (gen_rtx_LABEL_REF (Pmode, start_label)));
+      /* Insert end place for LE.  */
+      emit_insn (gen_mtlei (gen_rtx_LABEL_REF (Pmode, end_label)));
+
+      emit_insn (gen_rtx_USE (SImode, lb_reg));
+      emit_insn (gen_rtx_USE (SImode, le_reg));
+    }
+
+  /* Insert counter for LC.  */
+  emit_move_insn (lc_reg, iter_reg);
+  emit_insn (gen_rtx_USE (SImode, lc_reg));
+
+ /* Insert ISB instruction.  */
+  seq_end = emit_insn (gen_unspec_volatile_isb ());
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, seq_end);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  seq = get_insns ();
+  end_sequence ();
+
+  if (loop->incoming_src)
+    {
+      rtx prev = BB_END (loop->incoming_src);
+      emit_insn_after (seq, prev);
+    }
+  else
+    {
+      basic_block new_bb;
+      edge e;
+      edge_iterator ei;
+
+      emit_insn_before (seq, BB_HEAD (loop->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
+      FOR_EACH_EDGE (e, ei, loop->incoming)
+	{
+	  if (!(e->flags & EDGE_FALLTHRU)
+	      || e->dest != loop->head)
+	    redirect_edge_and_branch_force (e, new_bb);
+	  else
+	    redirect_edge_succ (e, new_bb);
+	}
+      e = make_edge (new_bb, loop->head, 0);
+    }
+
+  delete_insn (loop->loop_end);
+  /* Insert the loop end label before the last instruction of the loop.  */
+  emit_label_before (loop->end_label, loop->last_insn);
+
+  /* The last_insn don't do ifcall.  */
+  if (TARGET_IFC)
+    {
+      emit_insn_before (gen_no_ifc_begin (), loop->last_insn);
+      emit_insn_after (gen_no_ifc_end (), loop->last_insn);
+    }
+
+  /* The last_insn don't do ex9.  */
+  if (TARGET_EX9)
+    {
+      emit_insn_before (gen_no_ex9_begin (), loop->last_insn);
+      emit_insn_after (gen_no_ex9_end (), loop->last_insn);
+    }
+
+  if (loop->loop_no != loop->outermost->loop_no
+      && !same_depth_p)
+    {
+      start_sequence ();
+      /* Insert start place for LB.  */
+      emit_insn (gen_mtlbi (gen_rtx_LABEL_REF (Pmode, start_label)));
+      /* Insert end place for LE.  */
+      emit_insn (gen_mtlei (gen_rtx_LABEL_REF (Pmode, end_label)));
+
+      emit_insn (gen_rtx_USE (SImode, lb_reg));
+      seq_end = emit_insn (gen_rtx_USE (SImode, le_reg));
+
+      seq = get_insns ();
+      end_sequence ();
+
+      if (loop->outermost->incoming_src)
+	{
+	  rtx prev = BB_END (loop->outermost->incoming_src);
+	  if (vec_safe_length (loop->outermost->incoming) > 1
+	      || !(loop->outermost->incoming->last ()->flags & EDGE_FALLTHRU))
+	    {
+	      gcc_assert (JUMP_P (prev));
+	      prev = PREV_INSN (prev);
+	    }
+	  emit_insn_after (seq, prev);
+	}
+      else
+	{
+	  basic_block outer_new_bb;
+	  edge outer_e;
+	  edge_iterator outer_ei;
+
+	  emit_insn_before (seq, BB_HEAD (loop->outermost->head));
+	  seq = emit_label_before (gen_label_rtx (), seq);
+
+	  outer_new_bb = create_basic_block (seq, seq_end,
+					     loop->outermost->head->prev_bb);
+	  FOR_EACH_EDGE (outer_e, outer_ei, loop->outermost->incoming)
+	    {
+	      if (!(outer_e->flags & EDGE_FALLTHRU)
+		  || outer_e->dest != loop->outermost->head)
+		redirect_edge_and_branch_force (outer_e, outer_new_bb);
+	      else
+		redirect_edge_succ (outer_e, outer_new_bb);
+	    }
+	  outer_e = make_edge (outer_new_bb, loop->outermost->head, 0);
+	}
+    }
+  return true;
+}
+
+/* A callback for the hw-doloop pass.  Called when a loop we have discovered
+   turns out not to be optimizable; we have to split the doloop_end pattern
+   into a subtract and a test.  */
+static void
+hwloop_fail (hwloop_info loop)
+{
+   rtx test;
+   rtx insn = loop->loop_end;
+
+   emit_insn_before (gen_addsi3 (loop->iter_reg,
+				 loop->iter_reg,
+				 constm1_rtx),
+		     loop->loop_end);
+
+   test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+   insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+						 loop->iter_reg, const0_rtx,
+						 loop->start_label),
+				 loop->loop_end);
+
+   JUMP_LABEL (insn) = loop->start_label;
+   LABEL_NUSES (loop->start_label)++;
+   delete_insn (loop->loop_end);
+}
+
+static struct hw_doloop_hooks nds32_doloop1_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop1_optimize,
+  hwloop_fail
+};
+
+static struct hw_doloop_hooks nds32_doloop2_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop2_optimize,
+  hwloop_fail
+};
+
+/* This pass looks for doloop_end insns and tries to rewrite the RTL
+   of these loops so that proper NDS32 hardware loops are generated.  */
+static unsigned int
+nds32_hwloop1 (void)
+{
+  compute_bb_for_insn ();
+  reorg_loops (false, &nds32_doloop1_hooks);
+  return 1;
+}
+
+const pass_data pass_data_nds32_hwloop1_opt =
+{
+  RTL_PASS,				/* type */
+  "hwloop1_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing),		/* todo_flags_finish */
+};
+
+class pass_nds32_hwloop1_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_hwloop1_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_hwloop1_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return NDS32_HW_LOOP_P(); }
+  unsigned int execute () { return nds32_hwloop1 (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_hwloop1_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_hwloop1_opt (ctxt);
+}
+
+/* This pass looks for doloop_end insns and tries to rewrite the RTL
+   of these loops so that proper NDS32 hardware loops are generated.  */
+static unsigned int
+nds32_hwloop2 (void)
+{
+  compute_bb_for_insn ();
+  reorg_loops (false, &nds32_doloop2_hooks);
+  return 1;
+}
+
+const pass_data pass_data_nds32_hwloop2_opt =
+{
+  RTL_PASS,				/* type */
+  "hwloop2_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing),		/* todo_flags_finish */
+};
+
+class pass_nds32_hwloop2_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_hwloop2_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_hwloop2_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return NDS32_HW_LOOP_P(); }
+  unsigned int execute () { return nds32_hwloop2 (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_hwloop2_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_hwloop2_opt (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32_init.inc gcc-4.9.4/gcc/config/nds32/nds32_init.inc
--- gcc-4.9.4.orig/gcc/config/nds32/nds32_init.inc	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32_init.inc	2016-08-08 20:37:45.590273343 +0200
@@ -0,0 +1,43 @@
+/*
+ * nds32_init.inc
+ *
+ * NDS32 architecture startup assembler header file
+ *
+ */
+
+.macro nds32_init
+
+	! Initialize GP for data access
+	la      $gp, _SDA_BASE_
+
+#if defined(__NDS32_EXT_EX9__)
+	! Check HW for EX9
+	mfsr    $r0, $MSC_CFG
+	li      $r1, (1 << 24)
+	and     $r2, $r0, $r1
+	beqz    $r2, 1f
+
+	! Initialize the table base of EX9 instruction
+	la      $r0, _ITB_BASE_
+	mtusr   $r0, $ITB
+1:
+#endif
+
+#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__)
+	! Enable FPU
+	mfsr    $r0, $FUCOP_CTL
+	ori     $r0, $r0, #0x1
+	mtsr    $r0, $FUCOP_CTL
+	dsb
+
+	! Enable denormalized flush-to-Zero mode
+	fmfcsr  $r0
+	ori     $r0,$r0,#0x1000
+	fmtcsr  $r0
+	dsb
+#endif
+
+	! Initialize default stack pointer
+	la      $sp, _stack
+
+.endm
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-intrinsic.c gcc-4.9.4/gcc/config/nds32/nds32-intrinsic.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-intrinsic.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-intrinsic.c	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,1858 @@
+/* Intrinsic functions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For def_builtin().  */
+#include "ggc.h"
+#include "stor-layout.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Read the requested argument from the EXP given by INDEX.
+   Return the value as an rtx.  */
+static rtx
+nds32_read_argument (tree exp, unsigned int index)
+{
+  return expand_normal (CALL_EXPR_ARG (exp, index));
+}
+
+/* Return a legitimate rtx for instruction ICODE's return value.  Use TARGET
+   if it's not null, has the right mode, and satisfies operand 0's
+   predicate.  */
+static rtx
+nds32_legitimize_target (enum insn_code icode, rtx target)
+{
+  enum machine_mode mode = insn_data[icode].operand[0].mode;
+
+  if (! target
+      || GET_MODE (target) != mode
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+    return gen_reg_rtx (mode);
+  else
+    return target;
+}
+
+/* Given that ARG is being passed as operand OPNUM to instruction ICODE,
+   check whether ARG satisfies the operand's constraints.  If it doesn't,
+   copy ARG to a temporary register and return that.  Otherwise return ARG
+   itself.  */
+static rtx
+nds32_legitimize_argument (enum insn_code icode, int opnum, rtx arg)
+{
+  enum machine_mode mode = insn_data[icode].operand[opnum].mode;
+
+  if ((*insn_data[icode].operand[opnum].predicate) (arg, mode))
+    return arg;
+  else if (VECTOR_MODE_P (mode) && CONST_INT_P (arg))
+    {
+      /* Handle CONST_INT covert to CONST_VECTOR.  */
+      int nunits = GET_MODE_NUNITS (mode);
+      int i, shift = 0;
+      rtvec v = rtvec_alloc (nunits);
+      int val = INTVAL (arg);
+      enum machine_mode val_mode = (mode == V4QImode) ? QImode : HImode;
+      int shift_acc = (val_mode == QImode) ? 8 : 16;
+      int mask = (val_mode == QImode) ? 0xff : 0xffff;
+      int tmp_val = val;
+
+      if (TARGET_BIG_ENDIAN)
+	for (i = 0; i < nunits; i++)
+	  {
+	    tmp_val = (val >> shift) & mask;
+	    RTVEC_ELT (v, nunits - i - 1) = gen_int_mode (tmp_val, val_mode);
+	    shift += shift_acc;
+	  }
+      else
+	for (i = 0; i < nunits; i++)
+	  {
+	    tmp_val = (val >> shift) & mask;
+	    RTVEC_ELT (v, i) = gen_int_mode (tmp_val, val_mode);
+	    shift += shift_acc;
+	  }
+
+      return copy_to_mode_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
+    }
+  else if (VECTOR_MODE_P (mode)
+	   && !VECTOR_MODE_P (GET_MODE (arg)))
+    {
+      /* Handle non-Vector mode copy to Vector.  */
+      rtx tmp_rtx = gen_reg_rtx (mode);
+      convert_move (tmp_rtx, arg, false);
+      return tmp_rtx;
+    }
+  else
+    return copy_to_mode_reg (mode, arg);
+}
+
+/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
+   The instruction should require a constant operand of some sort.  The
+   function prints an error if OPVAL is not valid.  */
+static int
+nds32_check_constant_argument (enum insn_code icode, int opnum, rtx opval,
+			       const char *name)
+{
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("invalid argument to built-in function %s", name);
+      return false;
+    }
+  if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
+    {
+      error ("constant argument out of range for %s", name);
+
+      return false;
+    }
+  return true;
+}
+
+/* Expand builtins that return target.  */
+static rtx
+nds32_expand_noarg_builtin (enum insn_code icode, rtx target)
+{
+  rtx pat;
+
+  target = nds32_legitimize_target (icode, target);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take one operand.  */
+static rtx
+nds32_expand_unop_builtin (enum insn_code icode, tree exp, rtx target,
+			   bool return_p)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  int op0_num = return_p ? 1 : 0;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0);
+  else
+    pat = GEN_FCN (icode) (op0);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take one operands and the first is immediate.  */
+static rtx
+nds32_expand_unopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			      bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  int op0_num = return_p ? 1 : 0;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0);
+  else
+    pat = GEN_FCN (icode) (op0);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands.  */
+static rtx
+nds32_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
+			    bool return_p)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands and the second is immediate.  */
+static rtx
+nds32_expand_binopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			       bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op1_num, op1, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take three operands.  */
+static rtx
+nds32_expand_triop_builtin (enum insn_code icode, tree exp, rtx target,
+			    bool return_p)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx op2 = nds32_read_argument (exp, 2);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+  int op2_num = return_p ? 3 : 2;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+  op2 = nds32_legitimize_argument (icode, op2_num, op2);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  else
+    pat = GEN_FCN (icode) (op0, op1, op2);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take three operands and the third is immediate.  */
+static rtx
+nds32_expand_triopimm_builtin (enum insn_code icode, tree exp, rtx target,
+			       bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx op2 = nds32_read_argument (exp, 2);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+  int op2_num = return_p ? 3 : 2;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op2_num, op2, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+  op2 = nds32_legitimize_argument (icode, op2_num, op2);
+
+  /* Emit and return the new instruction. */
+  if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  else
+    pat = GEN_FCN (icode) (op0, op1, op2);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins for load.  */
+static rtx
+nds32_expand_builtin_load (enum insn_code icode, tree exp, rtx target)
+{
+  /* Load address format is [$ra + $rb],
+     but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+       movi $temp, 0
+       llw  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  target = nds32_legitimize_target (icode, target);
+  op0 = nds32_legitimize_argument (icode, 1, op0);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, addr_helper);
+  if (!pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins for store.  */
+static rtx
+nds32_expand_builtin_store (enum insn_code icode, tree exp, rtx target)
+{
+  /* Store address format is [$ra + $rb],
+     but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+       movi $temp, 0
+       store  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  op0 = nds32_legitimize_argument (icode, 0, op0);
+  op1 = nds32_legitimize_argument (icode, 2, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, addr_helper, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand cctl builtins.  */
+static rtx
+nds32_expand_cctl_builtin (enum insn_code icode, tree exp, rtx target,
+			   bool return_p, const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  int op0_num = return_p ? 1 : 0;
+  int op1_num = return_p ? 2 : 1;
+
+  if (return_p)
+    target = nds32_legitimize_target (icode, target);
+
+  if (!nds32_check_constant_argument (icode, op0_num, op0, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, op0_num, op0);
+  op1 = nds32_legitimize_argument (icode, op1_num, op1);
+
+  /* Emit and return the new instruction. */
+  if (icode == CODE_FOR_cctl_idx_write)
+    {
+      /* cctl_idx_write is three argument,
+	 so create operand2 for cctl_idx_write pattern.  */
+      rtx op2 = nds32_read_argument (exp, 2);
+      op2 = nds32_legitimize_argument (icode, 2, op2);
+      pat = GEN_FCN (icode) (op0, op1, op2);
+    }
+  else if (return_p)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand scw builtins.  */
+static rtx
+nds32_expand_scw_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  /* SCW address format is [$ra + $rb], but input arguments not enough,
+     so we need another temp register as $rb.
+     Generating assembly code:
+	movi $temp, 0
+	scw  $rt, [$ra + $temp] */
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+  rtx addr_helper = gen_reg_rtx (insn_data[icode].operand[1].mode);
+
+  target = nds32_legitimize_target (icode, target);
+  op0 = nds32_legitimize_argument (icode, 1, op0);
+  op1 = nds32_legitimize_argument (icode, 2, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, addr_helper, target);
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_move_insn (addr_helper, GEN_INT (0));
+  emit_move_insn (target, op1);
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand set int priority builtins. */
+static rtx
+nds32_expand_priority_builtin (enum insn_code icode, tree exp, rtx target,
+			       const char *name)
+{
+  rtx pat;
+  rtx op0 = nds32_read_argument (exp, 0);
+  rtx op1 = nds32_read_argument (exp, 1);
+
+  /* set_int_priority intrinsic function that two arguments are immediate,
+     so check whether auguments are immedite.  */
+
+  if (!nds32_check_constant_argument (icode, 0, op0, name))
+    return NULL_RTX;
+
+  if (!nds32_check_constant_argument (icode, 1, op1, name))
+    return NULL_RTX;
+
+  op0 = nds32_legitimize_argument (icode, 0, op0);
+  op1 = nds32_legitimize_argument (icode, 1, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, op1);
+
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *name;
+  enum nds32_builtins code;
+  bool return_p;
+};
+
+#define NDS32_BUILTIN(code, string, builtin) \
+  { CODE_FOR_##code, "__nds32__" string, \
+    NDS32_BUILTIN_##builtin, true },
+
+#define NDS32_NO_TARGET_BUILTIN(code, string, builtin) \
+  { CODE_FOR_##code, "__nds32__" string, \
+    NDS32_BUILTIN_##builtin, false },
+
+/* Intrinsics that no argument, and that return value.  */
+static struct builtin_description bdesc_noarg[] =
+{
+  NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG)
+  NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR)
+  NDS32_BUILTIN(unspec_rdov, "rdov", RDOV)
+  NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP)
+  NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS)
+  NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int",
+		GET_ALL_PENDING_INT)
+  NDS32_BUILTIN(unspec_unaligned_feature, "unaligned_feature",
+		UNALIGNED_FEATURE)
+  NDS32_NO_TARGET_BUILTIN(unspec_enable_unaligned, "enable_unaligned",
+			  ENABLE_UNALIGNED)
+  NDS32_NO_TARGET_BUILTIN(unspec_disable_unaligned, "disable_unaligned",
+			  DISABLE_UNALIGNED)
+};
+
+/* Intrinsics that take just one argument.  */
+static struct builtin_description bdesc_1arg[] =
+{
+  NDS32_BUILTIN(unspec_ssabssi2, "abs", ABS)
+  NDS32_BUILTIN(clzsi2, "clz", CLZ)
+  NDS32_BUILTIN(unspec_clo, "clo", CLO)
+  NDS32_BUILTIN(unspec_wsbh, "wsbh", WSBH)
+  NDS32_BUILTIN(unspec_tlbop_pb, "tlbop_pb",TLBOP_PB)
+  NDS32_BUILTIN(unaligned_load_hw, "unaligned_load_hw", UALOAD_HW)
+  NDS32_BUILTIN(unaligned_loadsi, "unaligned_load_w", UALOAD_W)
+  NDS32_BUILTIN(unaligned_loaddi, "unaligned_load_dw", UALOAD_DW)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_isync, "isync", ISYNC)
+  NDS32_NO_TARGET_BUILTIN(unspec_fmtcsr, "fmtcsr", FMTCSR)
+  NDS32_NO_TARGET_BUILTIN(unspec_jr_itoff, "jr_itoff", JR_ITOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jr_toff, "jr_toff", JR_TOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jral_ton, "jral_ton", JRAL_TON)
+  NDS32_NO_TARGET_BUILTIN(unspec_ret_toff, "ret_toff", RET_TOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_jral_iton, "jral_iton",JRAL_ITON)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_trd, "tlbop_trd", TLBOP_TRD)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_twr, "tlbop_twr", TLBOP_TWR)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwr, "tlbop_rwr", TLBOP_RWR)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_rwlk, "tlbop_rwlk", TLBOP_RWLK)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_unlk, "tlbop_unlk", TLBOP_UNLK)
+  NDS32_NO_TARGET_BUILTIN(unspec_tlbop_inv, "tlbop_inv", TLBOP_INV)
+  NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
+			  "set_current_sp", SET_CURRENT_SP)
+  NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16)
+  NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16)
+  NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8)
+  NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8)
+  NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810)
+  NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810)
+  NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820)
+  NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820)
+  NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830)
+  NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830)
+  NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831)
+  NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831)
+  NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810)
+  NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810)
+  NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820)
+  NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820)
+  NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830)
+  NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830)
+  NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831)
+  NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831)
+  NDS32_BUILTIN(unspec_kabs, "kabs", KABS)
+};
+
+/* Intrinsics that take just one argument. and the argument is immediate.  */
+static struct builtin_description bdesc_1argimm[] =
+{
+  NDS32_BUILTIN(unspec_volatile_mfsr, "mfsr", MFSR)
+  NDS32_BUILTIN(unspec_volatile_mfusr, "mfsr", MFUSR)
+  NDS32_BUILTIN(unspec_get_pending_int, "get_pending_int", GET_PENDING_INT)
+  NDS32_BUILTIN(unspec_get_int_priority, "get_int_priority", GET_INT_PRIORITY)
+  NDS32_NO_TARGET_BUILTIN(unspec_trap, "trap", TRAP)
+  NDS32_NO_TARGET_BUILTIN(unspec_break, "break", BREAK)
+  NDS32_NO_TARGET_BUILTIN(unspec_syscall, "syscall", SYSCALL)
+  NDS32_NO_TARGET_BUILTIN(unspec_enable_int, "enable_int", ENABLE_INT)
+  NDS32_NO_TARGET_BUILTIN(unspec_disable_int, "disable_int", DISABLE_INT)
+  NDS32_NO_TARGET_BUILTIN(unspec_clr_pending_hwint, "clr_pending_hwint",
+			  CLR_PENDING_HWINT)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_level, "set_trig_level",
+			  SET_TRIG_LEVEL)
+  NDS32_NO_TARGET_BUILTIN(unspec_set_trig_edge, "set_trig_edge",
+			  SET_TRIG_EDGE)
+  NDS32_BUILTIN(unspec_get_trig_type, "get_trig_type", GET_TRIG_TYPE)
+};
+
+/* Intrinsics that take two arguments.  */
+static struct builtin_description bdesc_2arg[] =
+{
+  NDS32_BUILTIN(unspec_fcpynss, "fcpynss", FCPYNSS)
+  NDS32_BUILTIN(unspec_fcpyss, "fcpyss", FCPYSS)
+  NDS32_BUILTIN(unspec_fcpynsd, "fcpynsd", FCPYNSD)
+  NDS32_BUILTIN(unspec_fcpysd, "fcpysd", FCPYSD)
+  NDS32_BUILTIN(unspec_ave, "ave", AVE)
+  NDS32_BUILTIN(unspec_pbsad, "pbsad", PBSAD)
+  NDS32_BUILTIN(unspec_ffb, "ffb", FFB)
+  NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM)
+  NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM)
+  NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW)
+  NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH)
+  NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW)
+  NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH)
+  NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB)
+  NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB)
+  NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT)
+  NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT)
+  NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB)
+  NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB)
+  NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT)
+  NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT)
+  NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB)
+  NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB)
+  NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT)
+  NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT)
+  NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB)
+  NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB)
+  NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT)
+  NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT)
+  NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW)
+  NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U)
+  NDS32_BUILTIN(rotrsi3, "rotr", ROTR)
+  NDS32_BUILTIN(unspec_sva, "sva", SVA)
+  NDS32_BUILTIN(unspec_svs, "svs", SVS)
+  NDS32_NO_TARGET_BUILTIN(mtsr_isb, "mtsr_isb", MTSR_ISB)
+  NDS32_NO_TARGET_BUILTIN(mtsr_dsb, "mtsr_dsb", MTSR_DSB)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtsr, "mtsr", MTSR)
+  NDS32_NO_TARGET_BUILTIN(unspec_volatile_mtusr, "mtusr", MTUSR)
+  NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW)
+  NDS32_BUILTIN(addv2hi3, "add16", ADD16)
+  NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16)
+  NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16)
+  NDS32_BUILTIN(raddv2hi3, "radd16", RADD16)
+  NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16)
+  NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16)
+  NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16)
+  NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16)
+  NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16)
+  NDS32_BUILTIN(subv2hi3, "sub16", SUB16)
+  NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16)
+  NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16)
+  NDS32_BUILTIN(cras16_1, "cras16", CRAS16)
+  NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16)
+  NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16)
+  NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16)
+  NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16)
+  NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16)
+  NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16)
+  NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16)
+  NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16)
+  NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16)
+  NDS32_BUILTIN(addv4qi3, "add8", ADD8)
+  NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8)
+  NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8)
+  NDS32_BUILTIN(raddv4qi3, "radd8", RADD8)
+  NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8)
+  NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8)
+  NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8)
+  NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8)
+  NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8)
+  NDS32_BUILTIN(subv4qi3, "sub8", SUB8)
+  NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8)
+  NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8)
+  NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16)
+  NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16)
+  NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U)
+  NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U)
+  NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16)
+  NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(kslra16, "kslra16", KSLRA16)
+  NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16)
+  NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U)
+  NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U)
+  NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16)
+  NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16)
+  NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16)
+  NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16)
+  NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16)
+  NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8)
+  NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8)
+  NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8)
+  NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8)
+  NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8)
+  NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16)
+  NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16)
+  NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16)
+  NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16)
+  NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16)
+  NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16)
+  NDS32_BUILTIN(khm16, "khm16", KHM16)
+  NDS32_BUILTIN(khm16, "v_khm16", V_KHM16)
+  NDS32_BUILTIN(khmx16, "khmx16", KHMX16)
+  NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16)
+  NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8)
+  NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8)
+  NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8)
+  NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8)
+  NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8)
+  NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8)
+  NDS32_BUILTIN(raddsi3, "raddw", RADDW)
+  NDS32_BUILTIN(uraddsi3, "uraddw", URADDW)
+  NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW)
+  NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(pkbb, "pkbb16", PKBB16)
+  NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16)
+  NDS32_BUILTIN(pkbt, "pkbt16", PKBT16)
+  NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16)
+  NDS32_BUILTIN(pktb, "pktb16", PKTB16)
+  NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16)
+  NDS32_BUILTIN(pktt, "pktt16", PKTT16)
+  NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16)
+  NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL)
+  NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U)
+  NDS32_BUILTIN(smmwb, "smmwb", SMMWB)
+  NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB)
+  NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U)
+  NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U)
+  NDS32_BUILTIN(smmwt, "smmwt", SMMWT)
+  NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT)
+  NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U)
+  NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U)
+  NDS32_BUILTIN(smbb, "smbb", SMBB)
+  NDS32_BUILTIN(smbb, "v_smbb", V_SMBB)
+  NDS32_BUILTIN(smbt, "smbt", SMBT)
+  NDS32_BUILTIN(smbt, "v_smbt", V_SMBT)
+  NDS32_BUILTIN(smtt, "smtt", SMTT)
+  NDS32_BUILTIN(smtt, "v_smtt", V_SMTT)
+  NDS32_BUILTIN(kmda, "kmda", KMDA)
+  NDS32_BUILTIN(kmda, "v_kmda", V_KMDA)
+  NDS32_BUILTIN(kmxda, "kmxda", KMXDA)
+  NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA)
+  NDS32_BUILTIN(smds, "smds", SMDS)
+  NDS32_BUILTIN(smds, "v_smds", V_SMDS)
+  NDS32_BUILTIN(smdrs, "smdrs", SMDRS)
+  NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS)
+  NDS32_BUILTIN(smxdsv, "smxds", SMXDS)
+  NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS)
+  NDS32_BUILTIN(smal1, "smal", SMAL)
+  NDS32_BUILTIN(smal1, "v_smal", V_SMAL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(adddi3, "sadd64", SADD64)
+  NDS32_BUILTIN(adddi3, "uadd64", UADD64)
+  NDS32_BUILTIN(radddi3, "radd64", RADD64)
+  NDS32_BUILTIN(uradddi3, "uradd64", URADD64)
+  NDS32_BUILTIN(kadddi3, "kadd64", KADD64)
+  NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64)
+  NDS32_BUILTIN(subdi3, "ssub64", SSUB64)
+  NDS32_BUILTIN(subdi3, "usub64", USUB64)
+  NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64)
+  NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64)
+  NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64)
+  NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64)
+  NDS32_BUILTIN(smul16, "smul16", SMUL16)
+  NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16)
+  NDS32_BUILTIN(smulx16, "smulx16", SMULX16)
+  NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16)
+  NDS32_BUILTIN(umul16, "umul16", UMUL16)
+  NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16)
+  NDS32_BUILTIN(umulx16, "umulx16", UMULX16)
+  NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16)
+  NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL)
+  NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U)
+};
+
+/* Two-argument intrinsics with an immediate second argument.  */
+static struct builtin_description bdesc_2argimm[] =
+{
+  NDS32_BUILTIN(unspec_bclr, "bclr", BCLR)
+  NDS32_BUILTIN(unspec_bset, "bset", BSET)
+  NDS32_BUILTIN(unspec_btgl, "btgl", BTGL)
+  NDS32_BUILTIN(unspec_btst, "btst", BTST)
+  NDS32_BUILTIN(unspec_clip, "clip", CLIP)
+  NDS32_BUILTIN(unspec_clips, "clips", CLIPS)
+  NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ)
+  NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ)
+  NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(sclip16, "sclip16", SCLIP16)
+  NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16)
+  NDS32_BUILTIN(uclip16, "uclip16", UCLIP16)
+  NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(uclip32, "uclip32", UCLIP32)
+  NDS32_BUILTIN(sclip32, "sclip32", SCLIP32)
+};
+
+/* Intrinsics that take three arguments.  */
+static struct builtin_description bdesc_3arg[] =
+{
+  NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA)
+  NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE)
+  NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
+  NDS32_BUILTIN(kmabb, "kmabb", KMABB)
+  NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB)
+  NDS32_BUILTIN(kmabt, "kmabt", KMABT)
+  NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT)
+  NDS32_BUILTIN(kmatt, "kmatt", KMATT)
+  NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT)
+  NDS32_BUILTIN(kmada, "kmada", KMADA)
+  NDS32_BUILTIN(kmada, "v_kmada", V_KMADA)
+  NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA)
+  NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA)
+  NDS32_BUILTIN(kmads, "kmads", KMADS)
+  NDS32_BUILTIN(kmads, "v_kmads", V_KMADS)
+  NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS)
+  NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS)
+  NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS)
+  NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS)
+  NDS32_BUILTIN(kmsda, "kmsda", KMSDA)
+  NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA)
+  NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA)
+  NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA)
+  NDS32_BUILTIN(bpick1, "bpick", BPICK)
+  NDS32_BUILTIN(smar64_1, "smar64", SMAR64)
+  NDS32_BUILTIN(smsr64, "smsr64", SMSR64)
+  NDS32_BUILTIN(umar64_1, "umar64", UMAR64)
+  NDS32_BUILTIN(umsr64, "umsr64", UMSR64)
+  NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64)
+  NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64)
+  NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64)
+  NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64)
+  NDS32_BUILTIN(smalbb, "smalbb", SMALBB)
+  NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB)
+  NDS32_BUILTIN(smalbt, "smalbt", SMALBT)
+  NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT)
+  NDS32_BUILTIN(smaltt, "smaltt", SMALTT)
+  NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT)
+  NDS32_BUILTIN(smalda1, "smalda", SMALDA)
+  NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA)
+  NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA)
+  NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA)
+  NDS32_BUILTIN(smalds1, "smalds", SMALDS)
+  NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS)
+  NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS)
+  NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS)
+  NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS)
+  NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS)
+  NDS32_BUILTIN(smslda1, "smslda", SMSLDA)
+  NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA)
+  NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA)
+  NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA)
+  NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB)
+  NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB)
+  NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U)
+  NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U)
+  NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT)
+  NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT)
+  NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U)
+  NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U)
+  NDS32_BUILTIN(kmmac, "kmmac", KMMAC)
+  NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U)
+  NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB)
+  NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U)
+};
+
+/* Three-argument intrinsics with an immediate third argument.  */
+static struct builtin_description bdesc_3argimm[] =
+{
+  NDS32_NO_TARGET_BUILTIN(prefetch_qw, "prefetch_qw", DPREF_QW)
+  NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
+  NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
+  NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
+  NDS32_BUILTIN(insb, "insb", INSB)
+};
+
+/* Intrinsics that load a value.  */
+static struct builtin_description bdesc_load[] =
+{
+  NDS32_BUILTIN(unspec_volatile_llw, "llw", LLW)
+  NDS32_BUILTIN(unspec_lwup, "lwup", LWUP)
+  NDS32_BUILTIN(unspec_lbup, "lbup", LBUP)
+};
+
+/* Intrinsics that store a value.  */
+static struct builtin_description bdesc_store[] =
+{
+  NDS32_BUILTIN(unspec_swup, "swup", SWUP)
+  NDS32_BUILTIN(unspec_sbup, "sbup", SBUP)
+};
+
+static struct builtin_description bdesc_cctl[] =
+{
+  NDS32_BUILTIN(cctl_idx_read, "cctl_idx_read", CCTL_IDX_READ)
+  NDS32_NO_TARGET_BUILTIN(cctl_idx_write, "cctl_idx_write", CCTL_IDX_WRITE)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_lck, "cctl_va_lck", CCTL_VA_LCK)
+  NDS32_NO_TARGET_BUILTIN(cctl_idx_wbinval,
+			  "cctl_idx_wbinval", CCTL_IDX_WBINVAL)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_l1,
+			  "cctl_va_wbinval_l1", CCTL_VA_WBINVAL_L1)
+  NDS32_NO_TARGET_BUILTIN(cctl_va_wbinval_la,
+			  "cctl_va_wbinval_la", CCTL_VA_WBINVAL_LA)
+};
+
+rtx
+nds32_expand_builtin_impl (tree exp,
+			   rtx target,
+			   rtx subtarget ATTRIBUTE_UNUSED,
+			   enum machine_mode mode ATTRIBUTE_UNUSED,
+			   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  unsigned i;
+  struct builtin_description *d;
+
+  if (!NDS32_EXT_DSP_P ()
+      && fcode > NDS32_BUILTIN_DSP_BEGIN
+      && fcode < NDS32_BUILTIN_DSP_END)
+    error ("don't support DSP extension instructions");
+
+  switch (fcode)
+    {
+    /* FPU Register Transfer.  */
+    case NDS32_BUILTIN_FMFCFG:
+    case NDS32_BUILTIN_FMFCSR:
+    case NDS32_BUILTIN_FMTCSR:
+    case NDS32_BUILTIN_FCPYNSS:
+    case NDS32_BUILTIN_FCPYSS:
+      /* Both v3s and v3f toolchains define TARGET_FPU_SINGLE.  */
+      if (!TARGET_FPU_SINGLE)
+	{
+	  error ("this builtin function is only available "
+		 "on the v3s or v3f toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* FPU Register Transfer.  */
+    case NDS32_BUILTIN_FCPYNSD:
+    case NDS32_BUILTIN_FCPYSD:
+      /* Only v3f toolchain defines TARGET_FPU_DOUBLE.  */
+      if (!TARGET_FPU_DOUBLE)
+	{
+	  error ("this builtin function is only available "
+		 "on the v3f toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Load and Store  */
+    case NDS32_BUILTIN_LLW:
+    case NDS32_BUILTIN_LWUP:
+    case NDS32_BUILTIN_LBUP:
+    case NDS32_BUILTIN_SCW:
+    case NDS32_BUILTIN_SWUP:
+    case NDS32_BUILTIN_SBUP:
+      if (TARGET_ISA_V3M)
+	{
+	  error ("this builtin function not support "
+		 "on the v3m toolchain");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Performance Extension  */
+    case NDS32_BUILTIN_ABS:
+    case NDS32_BUILTIN_AVE:
+    case NDS32_BUILTIN_BCLR:
+    case NDS32_BUILTIN_BSET:
+    case NDS32_BUILTIN_BTGL:
+    case NDS32_BUILTIN_BTST:
+    case NDS32_BUILTIN_CLIP:
+    case NDS32_BUILTIN_CLIPS:
+    case NDS32_BUILTIN_CLZ:
+    case NDS32_BUILTIN_CLO:
+      if (!TARGET_EXT_PERF)
+	{
+	  error ("don't support performance extension instructions");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* Performance Extension 2  */
+    case NDS32_BUILTIN_PBSAD:
+    case NDS32_BUILTIN_PBSADA:
+    case NDS32_BUILTIN_BSE:
+    case NDS32_BUILTIN_BSP:
+      if (!TARGET_EXT_PERF2)
+	{
+	  error ("don't support performance extension "
+		 "version 2 instructions");
+	  return NULL_RTX;
+	}
+      break;
+
+    /* String Extension  */
+    case NDS32_BUILTIN_FFB:
+    case NDS32_BUILTIN_FFMISM:
+    case NDS32_BUILTIN_FLMISM:
+      if (!TARGET_EXT_STRING)
+	{
+	  error ("don't support string extension instructions");
+	  return NULL_RTX;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  /* Since there are no result and operands, we can simply emit this rtx.  */
+  switch (fcode)
+    {
+    case NDS32_BUILTIN_ISB:
+      emit_insn (gen_unspec_volatile_isb ());
+      return target;
+    case NDS32_BUILTIN_DSB:
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_MSYNC_ALL:
+      emit_insn (gen_unspec_msync_all ());
+      return target;
+    case NDS32_BUILTIN_MSYNC_STORE:
+      emit_insn (gen_unspec_msync_store ());
+      return target;
+    case NDS32_BUILTIN_SETGIE_EN:
+      emit_insn (gen_unspec_volatile_setgie_en ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_SETGIE_DIS:
+      emit_insn (gen_unspec_volatile_setgie_dis ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_GIE_DIS:
+      emit_insn (gen_unspec_volatile_setgie_dis ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_GIE_EN:
+      emit_insn (gen_unspec_volatile_setgie_en ());
+      emit_insn (gen_unspec_dsb ());
+      return target;
+    case NDS32_BUILTIN_SET_PENDING_SWINT:
+      emit_insn (gen_unspec_set_pending_swint ());
+      return target;
+    case NDS32_BUILTIN_CLR_PENDING_SWINT:
+      emit_insn (gen_unspec_clr_pending_swint ());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_INVALALL:
+      emit_insn (gen_cctl_l1d_invalall());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_WBALL_ALVL:
+      emit_insn (gen_cctl_l1d_wball_alvl());
+      return target;
+    case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL:
+      emit_insn (gen_cctl_l1d_wball_one_lvl());
+      return target;
+    case NDS32_BUILTIN_CLROV:
+      emit_insn (gen_unspec_clrov ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT:
+      emit_insn (gen_unspec_standby_no_wake_grant ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_WAKE_GRANT:
+      emit_insn (gen_unspec_standby_wake_grant ());
+      return target;
+    case NDS32_BUILTIN_STANDBY_WAKE_DONE:
+      emit_insn (gen_unspec_standby_wait_done ());
+      return target;
+    case NDS32_BUILTIN_SETEND_BIG:
+      emit_insn (gen_unspec_setend_big ());
+      return target;
+    case NDS32_BUILTIN_SETEND_LITTLE:
+      emit_insn (gen_unspec_setend_little ());
+      return target;
+    case NDS32_BUILTIN_NOP:
+      emit_insn (gen_unspec_nop ());
+      return target;
+    case NDS32_BUILTIN_SCHE_BARRIER:
+      emit_insn (gen_blockage ());
+      return target;
+    case NDS32_BUILTIN_TLBOP_FLUA:
+      emit_insn (gen_unspec_tlbop_flua ());
+      return target;
+    /* Instruction sequence protection  */
+    case NDS32_BUILTIN_SIGNATURE_BEGIN:
+      emit_insn (gen_unspec_signature_begin ());
+      return target;
+    case NDS32_BUILTIN_SIGNATURE_END:
+      emit_insn (gen_unspec_signature_end ());
+      return target;
+    case NDS32_BUILTIN_SCW:
+      return nds32_expand_scw_builtin (CODE_FOR_unspec_volatile_scw,
+				       exp, target);
+    case NDS32_BUILTIN_SET_INT_PRIORITY:
+      return nds32_expand_priority_builtin (CODE_FOR_unspec_set_int_priority,
+					    exp, target,
+					    "__nds32__set_int_priority");
+    case NDS32_BUILTIN_NO_HWLOOP:
+      emit_insn (gen_no_hwloop ());
+      return target;
+    default:
+      break;
+    }
+
+  /* Expand groups of builtins.  */
+  for (i = 0, d = bdesc_noarg; i < ARRAY_SIZE (bdesc_noarg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_noarg_builtin (d->icode, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_unop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_1argimm; i < ARRAY_SIZE (bdesc_1argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_unopimm_builtin (d->icode, exp, target,
+					   d->return_p, d->name);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_binop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_binopimm_builtin (d->icode, exp, target,
+					    d->return_p, d->name);
+
+  for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_triop_builtin (d->icode, exp, target, d->return_p);
+
+  for (i = 0, d = bdesc_3argimm; i < ARRAY_SIZE (bdesc_3argimm); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_triopimm_builtin (d->icode, exp, target,
+					    d->return_p, d->name);
+
+  for (i = 0, d = bdesc_load; i < ARRAY_SIZE (bdesc_load); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_builtin_load (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_store; i < ARRAY_SIZE (bdesc_store); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_builtin_store (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_cctl; i < ARRAY_SIZE (bdesc_cctl); i++, d++)
+    if (d->code == fcode)
+      return nds32_expand_cctl_builtin (d->icode, exp, target,
+					d->return_p, d->name);
+
+  return NULL_RTX;
+}
+
+static GTY(()) tree nds32_builtin_decls[NDS32_BUILTIN_COUNT];
+
+/* Return the NDS32 builtin for CODE.  */
+tree
+nds32_builtin_decl_impl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= NDS32_BUILTIN_COUNT)
+    return error_mark_node;
+
+  return nds32_builtin_decls[code];
+}
+
+void
+nds32_init_builtins_impl (void)
+{
+#define ADD_NDS32_BUILTIN0(NAME, RET_TYPE, CODE)		\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
+  add_builtin_function ("__builtin_nds32_" NAME,		\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN1(NAME, RET_TYPE, ARG_TYPE, CODE)	\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =			\
+  add_builtin_function ("__builtin_nds32_" NAME,		\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE##_type_node, \
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN2(NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2, CODE)	\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
+  add_builtin_function ("__builtin_nds32_" NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_NDS32_BUILTIN3(NAME, RET_TYPE,				\
+			   ARG_TYPE1, ARG_TYPE2, ARG_TYPE3, CODE)	\
+  nds32_builtin_decls[NDS32_BUILTIN_ ## CODE] =				\
+  add_builtin_function ("__builtin_nds32_" NAME,			\
+			build_function_type_list (RET_TYPE##_type_node,	\
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  ARG_TYPE3##_type_node,\
+						  NULL_TREE),		\
+			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
+
+  /* Looking for return type and argument can be found in tree.h file.  */
+  tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node);
+  tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node);
+  tree ptr_uint_type_node = build_pointer_type (unsigned_type_node);
+  tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node);
+  tree v4qi_type_node = build_vector_type (intQI_type_node, 4);
+  tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v2hi_type_node = build_vector_type (intHI_type_node, 2);
+  tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2);
+  tree v2si_type_node = build_vector_type (intSI_type_node, 2);
+  tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2);
+
+  /* Cache.  */
+  ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC);
+  ADD_NDS32_BUILTIN0 ("isb", void, ISB);
+  ADD_NDS32_BUILTIN0 ("dsb", void, DSB);
+  ADD_NDS32_BUILTIN0 ("msync_all", void, MSYNC_ALL);
+  ADD_NDS32_BUILTIN0 ("msync_store", void, MSYNC_STORE);
+
+  /* Register Transfer.  */
+  ADD_NDS32_BUILTIN1 ("mfsr", unsigned, integer, MFSR);
+  ADD_NDS32_BUILTIN1 ("mfusr", unsigned, integer, MFUSR);
+  ADD_NDS32_BUILTIN2 ("mtsr", void, unsigned, integer, MTSR);
+  ADD_NDS32_BUILTIN2 ("mtsr_isb", void, unsigned, integer, MTSR_ISB);
+  ADD_NDS32_BUILTIN2 ("mtsr_dsb", void, unsigned, integer, MTSR_DSB);
+  ADD_NDS32_BUILTIN2 ("mtusr", void, unsigned, integer, MTUSR);
+
+  /* FPU Register Transfer.  */
+  ADD_NDS32_BUILTIN0 ("fmfcsr", unsigned, FMFCSR);
+  ADD_NDS32_BUILTIN1 ("fmtcsr", void, unsigned, FMTCSR);
+  ADD_NDS32_BUILTIN0 ("fmfcfg", unsigned, FMFCFG);
+  ADD_NDS32_BUILTIN2 ("fcpyss", float, float, float, FCPYSS);
+  ADD_NDS32_BUILTIN2 ("fcpynss", float, float, float, FCPYNSS);
+  ADD_NDS32_BUILTIN2 ("fcpysd", double, double, double, FCPYSD);
+  ADD_NDS32_BUILTIN2 ("fcpynsd", double, double, double, FCPYNSD);
+
+  /* Interrupt.  */
+  ADD_NDS32_BUILTIN0 ("setgie_en", void, SETGIE_EN);
+  ADD_NDS32_BUILTIN0 ("setgie_dis", void, SETGIE_DIS);
+  ADD_NDS32_BUILTIN0 ("gie_en", void, GIE_EN);
+  ADD_NDS32_BUILTIN0 ("gie_dis", void, GIE_DIS);
+  ADD_NDS32_BUILTIN1 ("enable_int", void, integer, ENABLE_INT);
+  ADD_NDS32_BUILTIN1 ("disable_int", void, integer, DISABLE_INT);
+  ADD_NDS32_BUILTIN0 ("set_pending_swint", void, SET_PENDING_SWINT);
+  ADD_NDS32_BUILTIN0 ("clr_pending_swint", void, CLR_PENDING_SWINT);
+  ADD_NDS32_BUILTIN0 ("get_all_pending_int", unsigned, GET_ALL_PENDING_INT);
+  ADD_NDS32_BUILTIN1 ("get_pending_int", unsigned, integer, GET_PENDING_INT);
+  ADD_NDS32_BUILTIN1 ("get_int_priority", unsigned, integer, GET_INT_PRIORITY);
+  ADD_NDS32_BUILTIN2 ("set_int_priority", void, integer, integer,
+		      SET_INT_PRIORITY);
+  ADD_NDS32_BUILTIN1 ("clr_pending_hwint", void, integer, CLR_PENDING_HWINT);
+  ADD_NDS32_BUILTIN1 ("set_trig_level", void, integer, SET_TRIG_LEVEL);
+  ADD_NDS32_BUILTIN1 ("set_trig_edge", void, integer, SET_TRIG_EDGE);
+  ADD_NDS32_BUILTIN1 ("get_trig_type", unsigned, integer, GET_TRIG_TYPE);
+
+  /* Load and Store  */
+  ADD_NDS32_BUILTIN1 ("llw", unsigned, ptr_uint, LLW);
+  ADD_NDS32_BUILTIN1 ("lwup", unsigned, ptr_uint, LWUP);
+  ADD_NDS32_BUILTIN1 ("lbup", char, ptr_uchar, LBUP);
+  ADD_NDS32_BUILTIN2 ("scw", unsigned, ptr_uint, unsigned, SCW);
+  ADD_NDS32_BUILTIN2 ("swup", void, ptr_uint, unsigned, SWUP);
+  ADD_NDS32_BUILTIN2 ("sbup", void, ptr_uchar, char, SBUP);
+
+  /* CCTL  */
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_invalall", void, CCTL_L1D_INVALALL);
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_alvl", void, CCTL_L1D_WBALL_ALVL);
+  ADD_NDS32_BUILTIN0 ("cctl_l1d_wball_one_lvl", void, CCTL_L1D_WBALL_ONE_LVL);
+  ADD_NDS32_BUILTIN2 ("cctl_va_lck", void, integer, ptr_uint, CCTL_VA_LCK);
+  ADD_NDS32_BUILTIN2 ("cctl_idx_wbinval", void, integer, unsigned,
+		      CCTL_IDX_WBINVAL);
+  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_l1", void, integer, ptr_uint,
+		      CCTL_VA_WBINVAL_L1);
+  ADD_NDS32_BUILTIN2 ("cctl_va_wbinval_la", void, integer, ptr_uint,
+		      CCTL_VA_WBINVAL_LA);
+  ADD_NDS32_BUILTIN2 ("cctl_idx_read", unsigned, integer, unsigned,
+		      CCTL_IDX_READ);
+  ADD_NDS32_BUILTIN3 ("cctl_idx_write", void, integer, unsigned, unsigned,
+		      CCTL_IDX_WRITE);
+
+  /* PREFETCH  */
+  ADD_NDS32_BUILTIN3 ("dpref_qw", void, ptr_uchar, unsigned, integer, DPREF_QW);
+  ADD_NDS32_BUILTIN3 ("dpref_hw", void, ptr_ushort, unsigned, integer,
+		      DPREF_HW);
+  ADD_NDS32_BUILTIN3 ("dpref_w", void, ptr_uint, unsigned, integer, DPREF_W);
+  ADD_NDS32_BUILTIN3 ("dpref_dw", void, ptr_ulong, unsigned, integer, DPREF_DW);
+
+  /* Performance Extension  */
+  ADD_NDS32_BUILTIN1 ("pe_abs", integer, integer, ABS);
+  ADD_NDS32_BUILTIN2 ("pe_ave", integer, integer, integer, AVE);
+  ADD_NDS32_BUILTIN2 ("pe_bclr", unsigned, unsigned, unsigned, BCLR);
+  ADD_NDS32_BUILTIN2 ("pe_bset", unsigned, unsigned, unsigned, BSET);
+  ADD_NDS32_BUILTIN2 ("pe_btgl", unsigned, unsigned, unsigned, BTGL);
+  ADD_NDS32_BUILTIN2 ("pe_btst", unsigned, unsigned, unsigned, BTST);
+  ADD_NDS32_BUILTIN2 ("pe_clip", unsigned, integer, unsigned, CLIP);
+  ADD_NDS32_BUILTIN2 ("pe_clips", integer, integer, unsigned, CLIPS);
+  ADD_NDS32_BUILTIN1 ("pe_clz", unsigned, unsigned, CLZ);
+  ADD_NDS32_BUILTIN1 ("pe_clo", unsigned, unsigned, CLO);
+
+  /* Performance Extension 2  */
+  ADD_NDS32_BUILTIN3 ("pe2_bse", void, ptr_uint, unsigned, ptr_uint, BSE);
+  ADD_NDS32_BUILTIN3 ("pe2_bsp", void, ptr_uint, unsigned, ptr_uint, BSP);
+  ADD_NDS32_BUILTIN2 ("pe2_pbsad", unsigned, unsigned, unsigned, PBSAD);
+  ADD_NDS32_BUILTIN3 ("pe2_pbsada", unsigned, unsigned, unsigned, unsigned,
+		      PBSADA);
+
+  /* String Extension  */
+  ADD_NDS32_BUILTIN2 ("se_ffb", integer, unsigned, unsigned, FFB);
+  ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM);
+  ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM);
+
+  /* SATURATION  */
+  ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW);
+  ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW);
+  ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH);
+  ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH);
+  ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB);
+  ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB);
+  ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT);
+  ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT);
+  ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB);
+  ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB);
+  ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT);
+  ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT);
+  ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB);
+  ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB);
+  ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT);
+  ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT);
+  ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB);
+  ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB);
+  ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT);
+  ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT);
+  ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW);
+  ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U);
+  ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV);
+  ADD_NDS32_BUILTIN0 ("clrov", void, CLROV);
+
+  /* ROTR  */
+  ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR);
+
+  /* Swap  */
+  ADD_NDS32_BUILTIN1 ("wsbh", unsigned, unsigned, WSBH);
+
+  /* System  */
+  ADD_NDS32_BUILTIN2 ("svs", unsigned, integer, integer, SVS);
+  ADD_NDS32_BUILTIN2 ("sva", unsigned, integer, integer, SVA);
+  ADD_NDS32_BUILTIN1 ("jr_itoff", void, unsigned, JR_ITOFF);
+  ADD_NDS32_BUILTIN1 ("jr_toff", void, unsigned, JR_TOFF);
+  ADD_NDS32_BUILTIN1 ("jral_iton", void, unsigned, JRAL_ITON);
+  ADD_NDS32_BUILTIN1 ("jral_ton", void, unsigned, JRAL_TON);
+  ADD_NDS32_BUILTIN1 ("ret_itoff", void, unsigned, RET_ITOFF);
+  ADD_NDS32_BUILTIN1 ("ret_toff", void, unsigned, RET_TOFF);
+  ADD_NDS32_BUILTIN0 ("standby_no_wake_grant", void, STANDBY_NO_WAKE_GRANT);
+  ADD_NDS32_BUILTIN0 ("standby_wake_grant", void, STANDBY_WAKE_GRANT);
+  ADD_NDS32_BUILTIN0 ("standby_wait_done", void, STANDBY_WAKE_DONE);
+  ADD_NDS32_BUILTIN1 ("break", void, unsigned, BREAK);
+  ADD_NDS32_BUILTIN1 ("syscall", void, unsigned, SYSCALL);
+  ADD_NDS32_BUILTIN0 ("nop", void, NOP);
+  ADD_NDS32_BUILTIN0 ("get_current_sp", unsigned, GET_CURRENT_SP);
+  ADD_NDS32_BUILTIN1 ("set_current_sp", void, unsigned, SET_CURRENT_SP);
+  ADD_NDS32_BUILTIN2 ("teqz", void, unsigned, unsigned, TEQZ);
+  ADD_NDS32_BUILTIN2 ("tnez", void, unsigned, unsigned, TNEZ);
+  ADD_NDS32_BUILTIN1 ("trap", void, unsigned, TRAP);
+  ADD_NDS32_BUILTIN0 ("return_address", unsigned, RETURN_ADDRESS);
+  ADD_NDS32_BUILTIN0 ("setend_big", void, SETEND_BIG);
+  ADD_NDS32_BUILTIN0 ("setend_little", void, SETEND_LITTLE);
+
+  /* Schedule Barrier */
+  ADD_NDS32_BUILTIN0 ("schedule_barrier", void, SCHE_BARRIER);
+
+  /* TLBOP  */
+  ADD_NDS32_BUILTIN1 ("tlbop_trd", void, unsigned, TLBOP_TRD);
+  ADD_NDS32_BUILTIN1 ("tlbop_twr", void, unsigned, TLBOP_TWR);
+  ADD_NDS32_BUILTIN1 ("tlbop_rwr", void, unsigned, TLBOP_RWR);
+  ADD_NDS32_BUILTIN1 ("tlbop_rwlk", void, unsigned, TLBOP_RWLK);
+  ADD_NDS32_BUILTIN1 ("tlbop_unlk", void, unsigned, TLBOP_UNLK);
+  ADD_NDS32_BUILTIN1 ("tlbop_pb", unsigned, unsigned, TLBOP_PB);
+  ADD_NDS32_BUILTIN1 ("tlbop_inv", void, unsigned, TLBOP_INV);
+  ADD_NDS32_BUILTIN0 ("tlbop_flua", void, TLBOP_FLUA);
+
+  /* Unaligned Load/Store  */
+  ADD_NDS32_BUILTIN1 ("unaligned_load_hw", short_unsigned, ptr_ushort,
+		      UALOAD_HW);
+  ADD_NDS32_BUILTIN1 ("unaligned_load_w", unsigned, ptr_uint, UALOAD_W);
+  ADD_NDS32_BUILTIN1 ("unaligned_load_dw", long_long_unsigned, ptr_ulong,
+		      UALOAD_DW);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_hw", void, ptr_ushort, short_unsigned,
+		      UASTORE_HW);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_w", void, ptr_uint, unsigned, UASTORE_W);
+  ADD_NDS32_BUILTIN2 ("unaligned_store_dw", void, ptr_ulong, long_long_unsigned,
+		      UASTORE_DW);
+  ADD_NDS32_BUILTIN0 ("unaligned_feature", unsigned, UNALIGNED_FEATURE);
+  ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED);
+  ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED);
+
+  /* Instruction sequence protection  */
+  ADD_NDS32_BUILTIN0 ("signature_begin", void, SIGNATURE_BEGIN);
+  ADD_NDS32_BUILTIN0 ("signature_end", void, SIGNATURE_END);
+
+  /* DSP Extension: SIMD 16bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16);
+  ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16);
+  ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16);
+  ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16);
+  ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16);
+  ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16);
+  ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16);
+  ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16);
+  ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16);
+  ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16);
+  ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16);
+  ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16);
+  ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16);
+  ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16);
+  ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16);
+  ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16);
+  ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16);
+  ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16);
+  ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16);
+  ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16);
+  ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16);
+  ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16);
+  ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16);
+  ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16);
+  ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16);
+  ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16);
+  ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16);
+  ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16);
+  ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16);
+
+  /* DSP Extension: SIMD 8bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8);
+  ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8);
+  ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8);
+  ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8);
+  ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8);
+  ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8);
+  ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8);
+  ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8);
+  ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8);
+  ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8);
+  ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8);
+  ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8);
+  ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8);
+  ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8);
+  ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8);
+  ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8);
+  ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8);
+  ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8);
+  ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8);
+  ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8);
+
+  /* DSP Extension: SIMD 16bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16);
+  ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16);
+  ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U);
+  ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16);
+  ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16);
+  ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U);
+  ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U);
+  ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16);
+  ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16);
+  ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16);
+  ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16);
+  ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16);
+  ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16);
+  ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U);
+
+  /* DSP Extension: 16bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16);
+
+  /* DSP Extension: 8bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8);
+
+  /* DSP Extension: SIMD 16bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16);
+  ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16);
+  ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16);
+  ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16);
+  ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16);
+  ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16);
+  ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16);
+  ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16);
+  ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16);
+  ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16);
+  ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16);
+  ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16);
+  ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16);
+  ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16);
+  ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16);
+  ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16);
+  ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16);
+  ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16);
+  ADD_NDS32_BUILTIN2 ("smulx16",
+		      long_long_unsigned, unsigned, unsigned, SMULX16);
+  ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16);
+  ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16);
+  ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16);
+  ADD_NDS32_BUILTIN2 ("umulx16",
+		      long_long_unsigned, unsigned, unsigned, UMULX16);
+  ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16);
+
+  /* DSP Extension: SIMD 8bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8);
+  ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8);
+  ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8);
+  ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8);
+  ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8);
+  ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8);
+  ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8);
+  ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8);
+  ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8);
+  ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8);
+
+  /* DSP Extension: 8bit Unpacking.  */
+  ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831);
+
+  /* DSP Extension: 32bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW);
+  ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW);
+  ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW);
+  ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW);
+
+  /* DSP Extension: 32bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U);
+  ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL);
+
+  /* DSP Extension: 16bit Packing.  */
+  ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16);
+  ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16);
+  ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16);
+  ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16);
+  ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16);
+  ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16);
+  ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16);
+  ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16);
+
+  /* DSP Extension: Signed MSW 32x32 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL);
+  ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U);
+  ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC);
+  ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U);
+  ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB);
+  ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U);
+  ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL);
+  ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U);
+
+  /* DSP Extension: Most Significant Word 32x16 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB);
+  ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB);
+  ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT);
+  ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT);
+  ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U);
+  ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB);
+  ADD_NDS32_BUILTIN3 ("kmmawb_u",
+		      integer, integer, integer, unsigned, KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb_u",
+		      integer, integer, integer, v2hi, V_KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT);
+  ADD_NDS32_BUILTIN3 ("kmmawt_u",
+		      integer, integer, integer, unsigned, KMMAWT_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt_u",
+		      integer, integer, integer, v2hi, V_KMMAWT_U);
+
+  /* DSP Extension: Signed 16bit Multiply with ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB);
+  ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB);
+  ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT);
+  ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT);
+  ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT);
+  ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT);
+  ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA);
+  ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA);
+  ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA);
+  ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA);
+  ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS);
+  ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS);
+  ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS);
+  ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS);
+  ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS);
+  ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS);
+  ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB);
+  ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB);
+  ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT);
+  ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT);
+  ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT);
+  ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT);
+  ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA);
+  ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA);
+  ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA);
+  ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS);
+  ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS);
+  ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS);
+  ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS);
+  ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS);
+  ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS);
+  ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA);
+  ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smal", long_long_integer,
+		      long_long_integer, unsigned, SMAL);
+  ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer,
+		      long_long_integer, v2hi, V_SMAL);
+
+  /* DSP Extension: 32bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV);
+  ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT);
+  ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK);
+  ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB);
+
+  /* DSP Extension: 64bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer,
+		      long_long_integer, long_long_integer, SADD64);
+  ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UADD64);
+  ADD_NDS32_BUILTIN2 ("radd64", long_long_integer,
+		      long_long_integer, long_long_integer, RADD64);
+  ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URADD64);
+  ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer,
+		      long_long_integer, long_long_integer, KADD64);
+  ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKADD64);
+  ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer,
+		      long_long_integer, long_long_integer, SSUB64);
+  ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, USUB64);
+  ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer,
+		      long_long_integer, long_long_integer, RSUB64);
+  ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URSUB64);
+  ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer,
+		      long_long_integer, long_long_integer, KSUB64);
+  ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKSUB64);
+
+  /* DSP Extension: 32bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smar64", long_long_integer,
+		      long_long_integer, integer, integer, SMAR64);
+  ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer,
+		      long_long_integer, integer, integer, SMSR64);
+  ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMAR64);
+  ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMSR64);
+  ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer,
+		      long_long_integer, integer, integer, KMAR64);
+  ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer,
+		      long_long_integer, integer, integer, KMSR64);
+  ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMAR64);
+  ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMSR64);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBB);
+  ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBB);
+  ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBT);
+  ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBT);
+  ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALTT);
+  ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALTT);
+  ADD_NDS32_BUILTIN3 ("smalda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDA);
+  ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDA);
+  ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDA);
+  ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDA);
+  ADD_NDS32_BUILTIN3 ("smalds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDS);
+  ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDS);
+  ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDRS);
+  ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDRS);
+  ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDS);
+  ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDS);
+  ADD_NDS32_BUILTIN3 ("smslda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLDA);
+  ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLDA);
+  ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLXDA);
+  ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLXDA);
+
+  /* DSP Extension: augmented baseline.  */
+  ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32);
+  ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32);
+  ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS);
+
+  /* The builtin turn off hwloop optimization.  */
+  ADD_NDS32_BUILTIN0 ("no_ext_zol", void, NO_HWLOOP);
+}
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32_intrinsic.h gcc-4.9.4/gcc/config/nds32/nds32_intrinsic.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32_intrinsic.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32_intrinsic.h	2016-08-08 20:37:45.594273497 +0200
@@ -1,5 +1,5 @@
 /* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -26,12 +26,1319 @@
 #ifndef _NDS32_INTRINSIC_H
 #define _NDS32_INTRINSIC_H
 
+typedef signed char int8x4_t __attribute ((vector_size(4)));
+typedef short int16x2_t __attribute ((vector_size(4)));
+typedef int int32x2_t __attribute__((vector_size(8)));
+typedef unsigned char uint8x4_t __attribute__ ((vector_size (4)));
+typedef unsigned short uint16x2_t __attribute__ ((vector_size (4)));
+typedef unsigned int uint32x2_t __attribute__((vector_size(8)));
+
+/* General instrinsic register names.  */
 enum nds32_intrinsic_registers
 {
-  __NDS32_REG_PSW__ = 1024,
+  __NDS32_REG_CPU_VER__ = 1024,
+  __NDS32_REG_ICM_CFG__,
+  __NDS32_REG_DCM_CFG__,
+  __NDS32_REG_MMU_CFG__,
+  __NDS32_REG_MSC_CFG__,
+  __NDS32_REG_MSC_CFG2__,
+  __NDS32_REG_CORE_ID__,
+  __NDS32_REG_FUCOP_EXIST__,
+
+  __NDS32_REG_PSW__,
   __NDS32_REG_IPSW__,
+  __NDS32_REG_P_IPSW__,
+  __NDS32_REG_IVB__,
+  __NDS32_REG_EVA__,
+  __NDS32_REG_P_EVA__,
   __NDS32_REG_ITYPE__,
-  __NDS32_REG_IPC__
+  __NDS32_REG_P_ITYPE__,
+
+  __NDS32_REG_MERR__,
+  __NDS32_REG_IPC__,
+  __NDS32_REG_P_IPC__,
+  __NDS32_REG_OIPC__,
+  __NDS32_REG_P_P0__,
+  __NDS32_REG_P_P1__,
+
+  __NDS32_REG_INT_MASK__,
+  __NDS32_REG_INT_MASK2__,
+  __NDS32_REG_INT_PEND__,
+  __NDS32_REG_INT_PEND2__,
+  __NDS32_REG_SP_USR__,
+  __NDS32_REG_SP_PRIV__,
+  __NDS32_REG_INT_PRI__,
+  __NDS32_REG_INT_PRI2__,
+  __NDS32_REG_INT_CTRL__,
+  __NDS32_REG_INT_TRIGGER__,
+  __NDS32_REG_INT_GPR_PUSH_DIS__,
+
+  __NDS32_REG_MMU_CTL__,
+  __NDS32_REG_L1_PPTB__,
+  __NDS32_REG_TLB_VPN__,
+  __NDS32_REG_TLB_DATA__,
+  __NDS32_REG_TLB_MISC__,
+  __NDS32_REG_VLPT_IDX__,
+  __NDS32_REG_ILMB__,
+  __NDS32_REG_DLMB__,
+
+  __NDS32_REG_CACHE_CTL__,
+  __NDS32_REG_HSMP_SADDR__,
+  __NDS32_REG_HSMP_EADDR__,
+  __NDS32_REG_SDZ_CTL__,
+  __NDS32_REG_N12MISC_CTL__,
+  __NDS32_REG_MISC_CTL__,
+  __NDS32_REG_ECC_MISC__,
+
+  __NDS32_REG_BPC0__,
+  __NDS32_REG_BPC1__,
+  __NDS32_REG_BPC2__,
+  __NDS32_REG_BPC3__,
+  __NDS32_REG_BPC4__,
+  __NDS32_REG_BPC5__,
+  __NDS32_REG_BPC6__,
+  __NDS32_REG_BPC7__,
+
+  __NDS32_REG_BPA0__,
+  __NDS32_REG_BPA1__,
+  __NDS32_REG_BPA2__,
+  __NDS32_REG_BPA3__,
+  __NDS32_REG_BPA4__,
+  __NDS32_REG_BPA5__,
+  __NDS32_REG_BPA6__,
+  __NDS32_REG_BPA7__,
+
+  __NDS32_REG_BPAM0__,
+  __NDS32_REG_BPAM1__,
+  __NDS32_REG_BPAM2__,
+  __NDS32_REG_BPAM3__,
+  __NDS32_REG_BPAM4__,
+  __NDS32_REG_BPAM5__,
+  __NDS32_REG_BPAM6__,
+  __NDS32_REG_BPAM7__,
+
+  __NDS32_REG_BPV0__,
+  __NDS32_REG_BPV1__,
+  __NDS32_REG_BPV2__,
+  __NDS32_REG_BPV3__,
+  __NDS32_REG_BPV4__,
+  __NDS32_REG_BPV5__,
+  __NDS32_REG_BPV6__,
+  __NDS32_REG_BPV7__,
+
+  __NDS32_REG_BPCID0__,
+  __NDS32_REG_BPCID1__,
+  __NDS32_REG_BPCID2__,
+  __NDS32_REG_BPCID3__,
+  __NDS32_REG_BPCID4__,
+  __NDS32_REG_BPCID5__,
+  __NDS32_REG_BPCID6__,
+  __NDS32_REG_BPCID7__,
+
+  __NDS32_REG_EDM_CFG__,
+  __NDS32_REG_EDMSW__,
+  __NDS32_REG_EDM_CTL__,
+  __NDS32_REG_EDM_DTR__,
+  __NDS32_REG_BPMTC__,
+  __NDS32_REG_DIMBR__,
+
+  __NDS32_REG_TECR0__,
+  __NDS32_REG_TECR1__,
+  __NDS32_REG_PFMC0__,
+  __NDS32_REG_PFMC1__,
+  __NDS32_REG_PFMC2__,
+  __NDS32_REG_PFM_CTL__,
+  __NDS32_REG_PFT_CTL__,
+  __NDS32_REG_HSP_CTL__,
+  __NDS32_REG_SP_BOUND__,
+  __NDS32_REG_SP_BOUND_PRIV__,
+  __NDS32_REG_FUCOP_CTL__,
+  __NDS32_REG_PRUSR_ACC_CTL__,
+
+  __NDS32_REG_DMA_CFG__,
+  __NDS32_REG_DMA_GCSW__,
+  __NDS32_REG_DMA_CHNSEL__,
+  __NDS32_REG_DMA_ACT__,
+  __NDS32_REG_DMA_SETUP__,
+  __NDS32_REG_DMA_ISADDR__,
+  __NDS32_REG_DMA_ESADDR__,
+  __NDS32_REG_DMA_TCNT__,
+  __NDS32_REG_DMA_STATUS__,
+  __NDS32_REG_DMA_2DSET__,
+  __NDS32_REG_DMA_2DSCTL__,
+  __NDS32_REG_DMA_RCNT__,
+  __NDS32_REG_DMA_HSTATUS__,
+
+  __NDS32_REG_PC__,
+  __NDS32_REG_SP_USR1__,
+  __NDS32_REG_SP_USR2__,
+  __NDS32_REG_SP_USR3__,
+  __NDS32_REG_SP_PRIV1__,
+  __NDS32_REG_SP_PRIV2__,
+  __NDS32_REG_SP_PRIV3__,
+  __NDS32_REG_BG_REGION__,
+  __NDS32_REG_SFCR__,
+  __NDS32_REG_SIGN__,
+  __NDS32_REG_ISIGN__,
+  __NDS32_REG_P_ISIGN__,
+  __NDS32_REG_IFC_LP__,
+  __NDS32_REG_ITB__
+};
+
+/* The cctl subtype for intrinsic.  */
+enum nds32_cctl_valck
+{
+  __NDS32_CCTL_L1D_VA_FILLCK__,
+  __NDS32_CCTL_L1D_VA_ULCK__,
+  __NDS32_CCTL_L1I_VA_FILLCK__,
+  __NDS32_CCTL_L1I_VA_ULCK__
+};
+
+enum nds32_cctl_idxwbinv
+{
+  __NDS32_CCTL_L1D_IX_WBINVAL__,
+  __NDS32_CCTL_L1D_IX_INVAL__,
+  __NDS32_CCTL_L1D_IX_WB__,
+  __NDS32_CCTL_L1I_IX_INVAL__
 };
 
+enum nds32_cctl_vawbinv
+{
+  __NDS32_CCTL_L1D_VA_INVAL__,
+  __NDS32_CCTL_L1D_VA_WB__,
+  __NDS32_CCTL_L1D_VA_WBINVAL__,
+  __NDS32_CCTL_L1I_VA_INVAL__
+};
+
+enum nds32_cctl_idxread
+{
+  __NDS32_CCTL_L1D_IX_RTAG__,
+  __NDS32_CCTL_L1D_IX_RWD__,
+  __NDS32_CCTL_L1I_IX_RTAG__,
+  __NDS32_CCTL_L1I_IX_RWD__
+};
+
+enum nds32_cctl_idxwrite
+{
+  __NDS32_CCTL_L1D_IX_WTAG__,
+  __NDS32_CCTL_L1D_IX_WWD__,
+  __NDS32_CCTL_L1I_IX_WTAG__,
+  __NDS32_CCTL_L1I_IX_WWD__
+};
+
+enum nds32_dpref
+{
+  __NDS32_DPREF_SRD__,
+  __NDS32_DPREF_MRD__,
+  __NDS32_DPREF_SWR__,
+  __NDS32_DPREF_MWR__,
+  __NDS32_DPREF_PTE__,
+  __NDS32_DPREF_CLWR__
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* Define interrupt number for intrinsic function.  */
+#define NDS32_INT_H0 0
+#define NDS32_INT_H1 1
+#define NDS32_INT_H2 2
+#define NDS32_INT_H3 3
+#define NDS32_INT_H4 4
+#define NDS32_INT_H5 5
+#define NDS32_INT_H6 6
+#define NDS32_INT_H7 7
+#define NDS32_INT_H8 8
+#define NDS32_INT_H9 9
+#define NDS32_INT_H10 10
+#define NDS32_INT_H11 11
+#define NDS32_INT_H12 12
+#define NDS32_INT_H13 13
+#define NDS32_INT_H14 14
+#define NDS32_INT_H15 15
+#define NDS32_INT_SWI 16
+#define NDS32_INT_ALZ 29
+#define NDS32_INT_IDIVZE 30
+#define NDS32_INT_DSSIM 31
+#define NDS32_INT_H16 32
+#define NDS32_INT_H17 33
+#define NDS32_INT_H18 34
+#define NDS32_INT_H19 35
+#define NDS32_INT_H20 36
+#define NDS32_INT_H21 37
+#define NDS32_INT_H22 38
+#define NDS32_INT_H23 39
+#define NDS32_INT_H24 40
+#define NDS32_INT_H25 41
+#define NDS32_INT_H26 42
+#define NDS32_INT_H27 43
+#define NDS32_INT_H28 44
+#define NDS32_INT_H29 45
+#define NDS32_INT_H30 46
+#define NDS32_INT_H31 47
+
+/* ------------------------------------------------------------------------ */
+
+/* Define intrinsic register name macro for compatibility.  */
+#define NDS32_SR_CPU_VER               __NDS32_REG_CPU_VER__
+#define NDS32_SR_ICM_CFG               __NDS32_REG_ICM_CFG__
+#define NDS32_SR_DCM_CFG               __NDS32_REG_DCM_CFG__
+#define NDS32_SR_MMU_CFG               __NDS32_REG_MMU_CFG__
+#define NDS32_SR_MSC_CFG               __NDS32_REG_MSC_CFG__
+#define NDS32_SR_MSC_CFG2              __NDS32_REG_MSC_CFG2__
+#define NDS32_SR_CORE_ID               __NDS32_REG_CORE_ID__
+#define NDS32_SR_FUCOP_EXIST           __NDS32_REG_FUCOP_EXIST__
+#define NDS32_SR_PSW                   __NDS32_REG_PSW__
+#define NDS32_SR_IPSW                  __NDS32_REG_IPSW__
+#define NDS32_SR_P_IPSW                __NDS32_REG_P_IPSW__
+#define NDS32_SR_IVB                   __NDS32_REG_IVB__
+#define NDS32_SR_EVA                   __NDS32_REG_EVA__
+#define NDS32_SR_P_EVA                 __NDS32_REG_P_EVA__
+#define NDS32_SR_ITYPE                 __NDS32_REG_ITYPE__
+#define NDS32_SR_P_ITYPE               __NDS32_REG_P_ITYPE__
+#define NDS32_SR_MERR                  __NDS32_REG_MERR__
+#define NDS32_SR_IPC                   __NDS32_REG_IPC__
+#define NDS32_SR_P_IPC                 __NDS32_REG_P_IPC__
+#define NDS32_SR_OIPC                  __NDS32_REG_OIPC__
+#define NDS32_SR_P_P0                  __NDS32_REG_P_P0__
+#define NDS32_SR_P_P1                  __NDS32_REG_P_P1__
+#define NDS32_SR_INT_MASK              __NDS32_REG_INT_MASK__
+#define NDS32_SR_INT_MASK2             __NDS32_REG_INT_MASK2__
+#define NDS32_SR_INT_PEND              __NDS32_REG_INT_PEND__
+#define NDS32_SR_INT_PEND2             __NDS32_REG_INT_PEND2__
+#define NDS32_SR_SP_USR                __NDS32_REG_SP_USR__
+#define NDS32_SR_SP_PRIV               __NDS32_REG_SP_PRIV__
+#define NDS32_SR_INT_PRI               __NDS32_REG_INT_PRI__
+#define NDS32_SR_INT_PRI2              __NDS32_REG_INT_PRI2__
+#define NDS32_SR_INT_CTRL              __NDS32_REG_INT_CTRL__
+#define NDS32_SR_INT_TRIGGER           __NDS32_REG_INT_TRIGGER__
+#define NDS32_SR_INT_GPR_PUSH_DIS      __NDS32_REG_INT_GPR_PUSH_DIS__
+#define NDS32_SR_MMU_CTL               __NDS32_REG_MMU_CTL__
+#define NDS32_SR_L1_PPTB               __NDS32_REG_L1_PPTB__
+#define NDS32_SR_TLB_VPN               __NDS32_REG_TLB_VPN__
+#define NDS32_SR_TLB_DATA              __NDS32_REG_TLB_DATA__
+#define NDS32_SR_TLB_MISC              __NDS32_REG_TLB_MISC__
+#define NDS32_SR_VLPT_IDX              __NDS32_REG_VLPT_IDX__
+#define NDS32_SR_ILMB                  __NDS32_REG_ILMB__
+#define NDS32_SR_DLMB                  __NDS32_REG_DLMB__
+#define NDS32_SR_CACHE_CTL             __NDS32_REG_CACHE_CTL__
+#define NDS32_SR_HSMP_SADDR            __NDS32_REG_HSMP_SADDR__
+#define NDS32_SR_HSMP_EADDR            __NDS32_REG_HSMP_EADDR__
+#define NDS32_SR_SDZ_CTL               __NDS32_REG_SDZ_CTL__
+#define NDS32_SR_N12MISC_CTL           __NDS32_REG_N12MISC_CTL__
+#define NDS32_SR_MISC_CTL              __NDS32_REG_MISC_CTL__
+#define NDS32_SR_ECC_MISC              __NDS32_REG_ECC_MISC__
+#define NDS32_SR_BPC0                  __NDS32_REG_BPC0__
+#define NDS32_SR_BPC1                  __NDS32_REG_BPC1__
+#define NDS32_SR_BPC2                  __NDS32_REG_BPC2__
+#define NDS32_SR_BPC3                  __NDS32_REG_BPC3__
+#define NDS32_SR_BPC4                  __NDS32_REG_BPC4__
+#define NDS32_SR_BPC5                  __NDS32_REG_BPC5__
+#define NDS32_SR_BPC6                  __NDS32_REG_BPC6__
+#define NDS32_SR_BPC7                  __NDS32_REG_BPC7__
+#define NDS32_SR_BPA0                  __NDS32_REG_BPA0__
+#define NDS32_SR_BPA1                  __NDS32_REG_BPA1__
+#define NDS32_SR_BPA2                  __NDS32_REG_BPA2__
+#define NDS32_SR_BPA3                  __NDS32_REG_BPA3__
+#define NDS32_SR_BPA4                  __NDS32_REG_BPA4__
+#define NDS32_SR_BPA5                  __NDS32_REG_BPA5__
+#define NDS32_SR_BPA6                  __NDS32_REG_BPA6__
+#define NDS32_SR_BPA7                  __NDS32_REG_BPA7__
+#define NDS32_SR_BPAM0                 __NDS32_REG_BPAM0__
+#define NDS32_SR_BPAM1                 __NDS32_REG_BPAM1__
+#define NDS32_SR_BPAM2                 __NDS32_REG_BPAM2__
+#define NDS32_SR_BPAM3                 __NDS32_REG_BPAM3__
+#define NDS32_SR_BPAM4                 __NDS32_REG_BPAM4__
+#define NDS32_SR_BPAM5                 __NDS32_REG_BPAM5__
+#define NDS32_SR_BPAM6                 __NDS32_REG_BPAM6__
+#define NDS32_SR_BPAM7                 __NDS32_REG_BPAM7__
+#define NDS32_SR_BPV0                  __NDS32_REG_BPV0__
+#define NDS32_SR_BPV1                  __NDS32_REG_BPV1__
+#define NDS32_SR_BPV2                  __NDS32_REG_BPV2__
+#define NDS32_SR_BPV3                  __NDS32_REG_BPV3__
+#define NDS32_SR_BPV4                  __NDS32_REG_BPV4__
+#define NDS32_SR_BPV5                  __NDS32_REG_BPV5__
+#define NDS32_SR_BPV6                  __NDS32_REG_BPV6__
+#define NDS32_SR_BPV7                  __NDS32_REG_BPV7__
+#define NDS32_SR_BPCID0                __NDS32_REG_BPCID0__
+#define NDS32_SR_BPCID1                __NDS32_REG_BPCID1__
+#define NDS32_SR_BPCID2                __NDS32_REG_BPCID2__
+#define NDS32_SR_BPCID3                __NDS32_REG_BPCID3__
+#define NDS32_SR_BPCID4                __NDS32_REG_BPCID4__
+#define NDS32_SR_BPCID5                __NDS32_REG_BPCID5__
+#define NDS32_SR_BPCID6                __NDS32_REG_BPCID6__
+#define NDS32_SR_BPCID7                __NDS32_REG_BPCID7__
+#define NDS32_SR_EDM_CFG               __NDS32_REG_EDM_CFG__
+#define NDS32_SR_EDMSW                 __NDS32_REG_EDMSW__
+#define NDS32_SR_EDM_CTL               __NDS32_REG_EDM_CTL__
+#define NDS32_SR_EDM_DTR               __NDS32_REG_EDM_DTR__
+#define NDS32_SR_BPMTC                 __NDS32_REG_BPMTC__
+#define NDS32_SR_DIMBR                 __NDS32_REG_DIMBR__
+#define NDS32_SR_TECR0                 __NDS32_REG_TECR0__
+#define NDS32_SR_TECR1                 __NDS32_REG_TECR1__
+#define NDS32_SR_PFMC0                 __NDS32_REG_PFMC0__
+#define NDS32_SR_PFMC1                 __NDS32_REG_PFMC1__
+#define NDS32_SR_PFMC2                 __NDS32_REG_PFMC2__
+#define NDS32_SR_PFM_CTL               __NDS32_REG_PFM_CTL__
+#define NDS32_SR_HSP_CTL               __NDS32_REG_HSP_CTL__
+#define NDS32_SR_SP_BOUND              __NDS32_REG_SP_BOUND__
+#define NDS32_SR_SP_BOUND_PRIV         __NDS32_REG_SP_BOUND_PRIV__
+#define NDS32_SR_FUCOP_CTL             __NDS32_REG_FUCOP_CTL__
+#define NDS32_SR_PRUSR_ACC_CTL         __NDS32_REG_PRUSR_ACC_CTL__
+#define NDS32_SR_DMA_CFG               __NDS32_REG_DMA_CFG__
+#define NDS32_SR_DMA_GCSW              __NDS32_REG_DMA_GCSW__
+#define NDS32_SR_DMA_CHNSEL            __NDS32_REG_DMA_CHNSEL__
+#define NDS32_SR_DMA_ACT               __NDS32_REG_DMA_ACT__
+#define NDS32_SR_DMA_SETUP             __NDS32_REG_DMA_SETUP__
+#define NDS32_SR_DMA_ISADDR            __NDS32_REG_DMA_ISADDR__
+#define NDS32_SR_DMA_ESADDR            __NDS32_REG_DMA_ESADDR__
+#define NDS32_SR_DMA_TCNT              __NDS32_REG_DMA_TCNT__
+#define NDS32_SR_DMA_STATUS            __NDS32_REG_DMA_STATUS__
+#define NDS32_SR_DMA_2DSET             __NDS32_REG_DMA_2DSET__
+#define NDS32_SR_DMA_2DSCTL            __NDS32_REG_DMA_2DSCTL__
+#define NDS32_SR_DMA_RCNT              __NDS32_REG_DMA_RCNT__
+#define NDS32_SR_DMA_HSTATUS           __NDS32_REG_DMA_HSTATUS__
+#define NDS32_SR_SP_USR1               __NDS32_REG_SP_USR1__
+#define NDS32_SR_SP_USR2               __NDS32_REG_SP_USR2__
+#define NDS32_SR_SP_USR3               __NDS32_REG_SP_USR3__
+#define NDS32_SR_SP_PRIV1              __NDS32_REG_SP_PRIV1__
+#define NDS32_SR_SP_PRIV2              __NDS32_REG_SP_PRIV2__
+#define NDS32_SR_SP_PRIV3              __NDS32_REG_SP_PRIV3__
+#define NDS32_SR_BG_REGION             __NDS32_REG_BG_REGION__
+#define NDS32_SR_SFCR                  __NDS32_REG_SFCR__
+#define NDS32_SR_SIGN                  __NDS32_REG_SIGN__
+#define NDS32_SR_ISIGN                 __NDS32_REG_ISIGN__
+#define NDS32_SR_P_ISIGN               __NDS32_REG_P_ISIGN__
+
+#define NDS32_USR_PC                    __NDS32_REG_PC__
+#define NDS32_USR_DMA_CFG               __NDS32_REG_DMA_CFG__
+#define NDS32_USR_DMA_GCSW              __NDS32_REG_DMA_GCSW__
+#define NDS32_USR_DMA_CHNSEL            __NDS32_REG_DMA_CHNSEL__
+#define NDS32_USR_DMA_ACT               __NDS32_REG_DMA_ACT__
+#define NDS32_USR_DMA_SETUP             __NDS32_REG_DMA_SETUP__
+#define NDS32_USR_DMA_ISADDR            __NDS32_REG_DMA_ISADDR__
+#define NDS32_USR_DMA_ESADDR            __NDS32_REG_DMA_ESADDR__
+#define NDS32_USR_DMA_TCNT              __NDS32_REG_DMA_TCNT__
+#define NDS32_USR_DMA_STATUS            __NDS32_REG_DMA_STATUS__
+#define NDS32_USR_DMA_2DSET             __NDS32_REG_DMA_2DSET__
+#define NDS32_USR_DMA_2DSCTL            __NDS32_REG_DMA_2DSCTL__
+#define NDS32_USR_PFMC0                 __NDS32_REG_PFMC0__
+#define NDS32_USR_PFMC1                 __NDS32_REG_PFMC1__
+#define NDS32_USR_PFMC2                 __NDS32_REG_PFMC2__
+#define NDS32_USR_PFM_CTL               __NDS32_REG_PFM_CTL__
+#define NDS32_USR_IFC_LP                __NDS32_REG_IFC_LP__
+#define NDS32_USR_ITB                   __NDS32_REG_ITB__
+
+#define NDS32_CCTL_L1D_VA_FILLCK        __NDS32_CCTL_L1D_VA_FILLCK__
+#define NDS32_CCTL_L1D_VA_ULCK          __NDS32_CCTL_L1D_VA_ULCK__
+#define NDS32_CCTL_L1I_VA_FILLCK        __NDS32_CCTL_L1I_VA_FILLCK__
+#define NDS32_CCTL_L1I_VA_ULCK          __NDS32_CCTL_L1I_VA_ULCK__
+
+#define NDS32_CCTL_L1D_IX_WBINVAL       __NDS32_CCTL_L1D_IX_WBINVAL__
+#define NDS32_CCTL_L1D_IX_INVAL         __NDS32_CCTL_L1D_IX_INVAL__
+#define NDS32_CCTL_L1D_IX_WB            __NDS32_CCTL_L1D_IX_WB__
+#define NDS32_CCTL_L1I_IX_INVAL         __NDS32_CCTL_L1I_IX_INVAL__
+
+#define NDS32_CCTL_L1D_VA_INVAL         __NDS32_CCTL_L1D_VA_INVAL__
+#define NDS32_CCTL_L1D_VA_WB            __NDS32_CCTL_L1D_VA_WB__
+#define NDS32_CCTL_L1D_VA_WBINVAL       __NDS32_CCTL_L1D_VA_WBINVAL__
+#define NDS32_CCTL_L1I_VA_INVAL         __NDS32_CCTL_L1I_VA_INVAL__
+
+#define NDS32_CCTL_L1D_IX_RTAG          __NDS32_CCTL_L1D_IX_RTAG__
+#define NDS32_CCTL_L1D_IX_RWD           __NDS32_CCTL_L1D_IX_RWD__
+#define NDS32_CCTL_L1I_IX_RTAG          __NDS32_CCTL_L1I_IX_RTAG__
+#define NDS32_CCTL_L1I_IX_RWD           __NDS32_CCTL_L1I_IX_RWD__
+
+#define NDS32_CCTL_L1D_IX_WTAG          __NDS32_CCTL_L1D_IX_WTAG__
+#define NDS32_CCTL_L1D_IX_WWD           __NDS32_CCTL_L1D_IX_WWD__
+#define NDS32_CCTL_L1I_IX_WTAG          __NDS32_CCTL_L1I_IX_WTAG__
+#define NDS32_CCTL_L1I_IX_WWD           __NDS32_CCTL_L1I_IX_WWD__
+
+#define NDS32_DPREF_SRD                 __NDS32_DPREF_SRD__
+#define NDS32_DPREF_MRD                 __NDS32_DPREF_MRD__
+#define NDS32_DPREF_SWR                 __NDS32_DPREF_SWR__
+#define NDS32_DPREF_MWR                 __NDS32_DPREF_MWR__
+#define NDS32_DPREF_PTE                 __NDS32_DPREF_PTE__
+#define NDS32_DPREF_CLWR                __NDS32_DPREF_CLWR__
+
+/* ------------------------------------------------------------------------ */
+
+/* Define user friendly macro.  */
+#define SIGNATURE_BEGIN	__nds32__signature_begin ()
+#define SIGNATURE_END	__nds32__signature_end ()
+
+/* Map __nds32__xxx() to __builtin_xxx() functions for compatibility.  */
+#define __nds32__llw(a) \
+  (__builtin_nds32_llw ((a)))
+#define __nds32__lwup(a) \
+  (__builtin_nds32_lwup ((a)))
+#define __nds32__lbup(a) \
+  (__builtin_nds32_lbup ((a)))
+#define __nds32__scw(a, b) \
+  (__builtin_nds32_scw ((a), (b)))
+#define __nds32__swup(a, b) \
+  (__builtin_nds32_swup ((a), (b)))
+#define __nds32__sbup(a, b) \
+  (__builtin_nds32_sbup ((a), (b)))
+
+#define __nds32__mfsr(srname) \
+  (__builtin_nds32_mfsr ((srname)))
+#define __nds32__mfusr(usrname) \
+  (__builtin_nds32_mfusr ((usrname)))
+#define __nds32__mtsr(val, srname) \
+  (__builtin_nds32_mtsr ((val), (srname)))
+#define __nds32__mtsr_isb(val, srname) \
+  (__builtin_nds32_mtsr_isb ((val), (srname)))
+#define __nds32__mtsr_dsb(val, srname) \
+  (__builtin_nds32_mtsr_dsb ((val), (srname)))
+#define __nds32__mtusr(val, usrname) \
+  (__builtin_nds32_mtusr ((val), (usrname)))
+
+#define __nds32__break(swid) \
+  (__builtin_nds32_break(swid))
+#define __nds32__cctlva_lck(subtype, va) \
+  (__builtin_nds32_cctl_va_lck ((subtype), (va)))
+#define __nds32__cctlidx_wbinval(subtype, idx) \
+  (__builtin_nds32_cctl_idx_wbinval ((subtype), (idx)))
+#define __nds32__cctlva_wbinval_alvl(subtype, va) \
+  (__builtin_nds32_cctl_va_wbinval_la ((subtype), (va)))
+#define __nds32__cctlva_wbinval_one_lvl(subtype, va) \
+  (__builtin_nds32_cctl_va_wbinval_l1 ((subtype), (va)))
+#define __nds32__cctlidx_read(subtype, idx) \
+  (__builtin_nds32_cctl_idx_read ((subtype), (idx)))
+#define __nds32__cctlidx_write(subtype, b, idxw) \
+  (__builtin_nds32_cctl_idx_write ((subtype), (b), (idxw)))
+#define __nds32__cctl_l1d_invalall()  \
+  (__builtin_nds32_cctl_l1d_invalall())
+#define __nds32__cctl_l1d_wball_alvl() \
+  (__builtin_nds32_cctl_l1d_wball_alvl())
+#define __nds32__cctl_l1d_wball_one_lvl() \
+  (__builtin_nds32_cctl_l1d_wball_one_lvl())
+
+#define __nds32__dsb() \
+  (__builtin_nds32_dsb())
+#define __nds32__isb() \
+  (__builtin_nds32_isb())
+#define __nds32__msync_store() \
+  (__builtin_nds32_msync_store())
+#define __nds32__msync_all() \
+  (__builtin_nds32_msync_all())
+#define __nds32__nop() \
+  (__builtin_nds32_nop())
+
+#define __nds32__standby_wait_done() \
+  (__builtin_nds32_standby_wait_done())
+#define __nds32__standby_no_wake_grant() \
+  (__builtin_nds32_standby_no_wake_grant())
+#define __nds32__standby_wake_grant() \
+  (__builtin_nds32_standby_wake_grant())
+#define __nds32__schedule_barrier() \
+  (__builtin_nds32_schedule_barrier())
+#define __nds32__setend_big() \
+  (__builtin_nds32_setend_big())
+#define __nds32__setend_little() \
+  (__builtin_nds32_setend_little())
+#define __nds32__setgie_en() \
+  (__builtin_nds32_setgie_en())
+#define __nds32__setgie_dis() \
+  (__builtin_nds32_setgie_dis())
+
+#define __nds32__jr_itoff(a) \
+  (__builtin_nds32_jr_itoff ((a)))
+#define __nds32__jr_toff(a) \
+  (__builtin_nds32_jr_toff ((a)))
+#define __nds32__jral_iton(a) \
+  (__builtin_nds32_jral_iton ((a)))
+#define __nds32__jral_ton(a) \
+  (__builtin_nds32_jral_ton ((a)))
+#define __nds32__ret_itoff(a) \
+  (__builtin_nds32_ret_itoff ((a)))
+#define __nds32__ret_toff(a) \
+  (__builtin_nds32_ret_toff ((a)))
+#define __nds32__svs(a, b) \
+  (__builtin_nds32_svs ((a), (b)))
+#define __nds32__sva(a, b) \
+  (__builtin_nds32_sva ((a), (b)))
+#define __nds32__dpref_qw(a, b, subtype) \
+  (__builtin_nds32_dpref_qw ((a), (b), (subtype)))
+#define __nds32__dpref_hw(a, b, subtype) \
+  (__builtin_nds32_dpref_hw ((a), (b), (subtype)))
+#define __nds32__dpref_w(a, b, subtype) \
+  (__builtin_nds32_dpref_w ((a), (b), (subtype)))
+#define __nds32__dpref_dw(a, b, subtype) \
+  (__builtin_nds32_dpref_dw ((a), (b), (subtype)))
+
+#define __nds32__teqz(a, swid) \
+  (__builtin_nds32_teqz ((a), (swid)))
+#define __nds32__tnez(a, swid) \
+  ( __builtin_nds32_tnez ((a), (swid)))
+#define __nds32__trap(swid) \
+  (__builtin_nds32_trap ((swid)))
+#define __nds32__isync(a) \
+  (__builtin_nds32_isync ((a)))
+#define __nds32__rotr(val, ror) \
+  (__builtin_nds32_rotr ((val), (ror)))
+#define __nds32__wsbh(a) \
+  (__builtin_nds32_wsbh ((a)))
+#define __nds32__syscall(a) \
+  (__builtin_nds32_syscall ((a)))
+#define __nds32__return_address() \
+  (__builtin_nds32_return_address())
+#define __nds32__get_current_sp() \
+  (__builtin_nds32_get_current_sp())
+#define __nds32__set_current_sp(a) \
+  (__builtin_nds32_set_current_sp ((a)))
+#define __nds32__abs(a) \
+  (__builtin_nds32_pe_abs ((a)))
+#define __nds32__ave(a, b) \
+  (__builtin_nds32_pe_ave ((a), (b)))
+#define __nds32__bclr(a, pos) \
+  (__builtin_nds32_pe_bclr ((a), (pos)))
+#define __nds32__bset(a, pos) \
+  (__builtin_nds32_pe_bset ((a), (pos)))
+#define __nds32__btgl(a, pos) \
+  (__builtin_nds32_pe_btgl ((a), (pos)))
+#define __nds32__btst(a, pos) \
+  (__builtin_nds32_pe_btst ((a), (pos)))
+
+#define __nds32__clip(a, imm) \
+  (__builtin_nds32_pe_clip ((a), (imm)))
+#define __nds32__clips(a, imm) \
+  (__builtin_nds32_pe_clips ((a), (imm)))
+#define __nds32__clz(a) \
+  (__builtin_nds32_pe_clz ((a)))
+#define __nds32__clo(a) \
+  (__builtin_nds32_pe_clo ((a)))
+#define __nds32__bse(r, a, b) \
+  (__builtin_nds32_pe2_bse ((r), (a), (b)))
+#define __nds32__bsp(r, a, b) \
+  (__builtin_nds32_pe2_bsp ((r), (a), (b)))
+#define __nds32__pbsad(a, b) \
+  (__builtin_nds32_pe2_pbsad ((a), (b)))
+#define __nds32__pbsada(acc, a, b) \
+  (__builtin_nds32_pe2_pbsada ((acc), (a), (b)))
+
+#define __nds32__ffb(a, b) \
+  (__builtin_nds32_se_ffb ((a), (b)))
+#define __nds32__ffmism(a, b) \
+  (__builtin_nds32_se_ffmism ((a), (b)))
+#define __nds32__flmism(a, b) \
+  (__builtin_nds32_se_flmism ((a), (b)))
+#define __nds32__fcpynsd(a, b) \
+  (__builtin_nds32_fcpynsd ((a), (b)))
+#define __nds32__fcpynss(a, b) \
+  (__builtin_nds32_fcpynss ((a), (b)))
+#define __nds32__fcpysd(a, b) \
+  (__builtin_nds32_fcpysd ((a), (b)))
+#define __nds32__fcpyss(a, b) \
+  (__builtin_nds32_fcpyss ((a), (b)))
+#define __nds32__fmfcsr() \
+  (__builtin_nds32_fmfcsr())
+#define __nds32__fmtcsr(fpcsr) \
+  (__builtin_nds32_fmtcsr ((fpcsr)))
+#define __nds32__fmfcfg() \
+  (__builtin_nds32_fmfcfg())
+
+#define __nds32__tlbop_trd(a) \
+  (__builtin_nds32_tlbop_trd ((a)))
+#define __nds32__tlbop_twr(a) \
+  (__builtin_nds32_tlbop_twr ((a)))
+#define __nds32__tlbop_rwr(a) \
+  (__builtin_nds32_tlbop_rwr ((a)))
+#define __nds32__tlbop_rwlk(a) \
+  (__builtin_nds32_tlbop_rwlk ((a)))
+#define __nds32__tlbop_unlk(a) \
+  (__builtin_nds32_tlbop_unlk ((a)))
+#define __nds32__tlbop_pb(a) \
+  (__builtin_nds32_tlbop_pb ((a)))
+#define __nds32__tlbop_inv(a) \
+  (__builtin_nds32_tlbop_inv ((a)))
+#define __nds32__tlbop_flua() \
+(__builtin_nds32_tlbop_flua())
+
+#define __nds32__kaddw(a, b) \
+  (__builtin_nds32_kaddw ((a), (b)))
+#define __nds32__kaddh(a, b) \
+  (__builtin_nds32_kaddh ((a), (b)))
+#define __nds32__ksubw(a, b) \
+  (__builtin_nds32_ksubw ((a), (b)))
+#define __nds32__ksubh(a, b) \
+  (__builtin_nds32_ksubh ((a), (b)))
+#define __nds32__kdmbb(a, b) \
+  (__builtin_nds32_kdmbb ((a), (b)))
+#define __nds32__v_kdmbb(a, b) \
+  (__builtin_nds32_v_kdmbb ((a), (b)))
+#define __nds32__kdmbt(a, b) \
+  (__builtin_nds32_kdmbt ((a), (b)))
+#define __nds32__v_kdmbt(a, b) \
+  (__builtin_nds32_v_kdmbt ((a), (b)))
+#define __nds32__kdmtb(a, b) \
+  (__builtin_nds32_kdmtb ((a), (b)))
+#define __nds32__v_kdmtb(a, b) \
+  (__builtin_nds32_v_kdmtb ((a), (b)))
+#define __nds32__kdmtt(a, b) \
+  (__builtin_nds32_kdmtt ((a), (b)))
+#define __nds32__v_kdmtt(a, b) \
+  (__builtin_nds32_v_kdmtt ((a), (b)))
+#define __nds32__khmbb(a, b) \
+  (__builtin_nds32_khmbb ((a), (b)))
+#define __nds32__v_khmbb(a, b) \
+  (__builtin_nds32_v_khmbb ((a), (b)))
+#define __nds32__khmbt(a, b) \
+  (__builtin_nds32_khmbt ((a), (b)))
+#define __nds32__v_khmbt(a, b) \
+  (__builtin_nds32_v_khmbt ((a), (b)))
+#define __nds32__khmtb(a, b) \
+  (__builtin_nds32_khmtb ((a), (b)))
+#define __nds32__v_khmtb(a, b) \
+  (__builtin_nds32_v_khmtb ((a), (b)))
+#define __nds32__khmtt(a, b) \
+  (__builtin_nds32_khmtt ((a), (b)))
+#define __nds32__v_khmtt(a, b) \
+  (__builtin_nds32_v_khmtt ((a), (b)))
+#define __nds32__kslraw(a, b) \
+  (__builtin_nds32_kslraw ((a), (b)))
+#define __nds32__kslraw_u(a, b) \
+  (__builtin_nds32_kslraw_u ((a), (b)))
+
+#define __nds32__rdov() \
+  (__builtin_nds32_rdov())
+#define __nds32__clrov() \
+  (__builtin_nds32_clrov())
+#define __nds32__gie_dis() \
+  (__builtin_nds32_gie_dis())
+#define __nds32__gie_en() \
+  (__builtin_nds32_gie_en())
+#define __nds32__enable_int(a) \
+  (__builtin_nds32_enable_int ((a)))
+#define __nds32__disable_int(a) \
+  (__builtin_nds32_disable_int ((a)))
+#define __nds32__set_pending_swint() \
+  (__builtin_nds32_set_pending_swint())
+#define __nds32__clr_pending_swint() \
+  (__builtin_nds32_clr_pending_swint())
+#define __nds32__clr_pending_hwint(a) \
+  (__builtin_nds32_clr_pending_hwint(a))
+#define __nds32__get_all_pending_int() \
+  (__builtin_nds32_get_all_pending_int())
+#define __nds32__get_pending_int(a) \
+  (__builtin_nds32_get_pending_int ((a)))
+#define __nds32__set_int_priority(a, b) \
+  (__builtin_nds32_set_int_priority ((a), (b)))
+#define __nds32__get_int_priority(a) \
+  (__builtin_nds32_get_int_priority ((a)))
+#define __nds32__set_trig_type_level(a) \
+  (__builtin_nds32_set_trig_level(a))
+#define __nds32__set_trig_type_edge(a) \
+  (__builtin_nds32_set_trig_edge(a))
+#define __nds32__get_trig_type(a) \
+  (__builtin_nds32_get_trig_type ((a)))
+
+#define __nds32__get_unaligned_hw(a) \
+  (__builtin_nds32_unaligned_load_hw ((a)))
+#define __nds32__get_unaligned_w(a) \
+  (__builtin_nds32_unaligned_load_w ((a)))
+#define __nds32__get_unaligned_dw(a) \
+  (__builtin_nds32_unaligned_load_dw ((a)))
+#define __nds32__put_unaligned_hw(a, data) \
+  (__builtin_nds32_unaligned_store_hw ((a), (data)))
+#define __nds32__put_unaligned_w(a, data) \
+  (__builtin_nds32_unaligned_store_w ((a), (data)))
+#define __nds32__put_unaligned_dw(a, data) \
+  (__builtin_nds32_unaligned_store_dw ((a), (data)))
+
+#define __nds32__signature_begin() \
+  (__builtin_nds32_signature_begin ())
+#define __nds32__signature_end() \
+  (__builtin_nds32_signature_end ())
+
+#define __nds32__add16(a, b) \
+  (__builtin_nds32_add16 ((a), (b)))
+#define __nds32__v_uadd16(a, b) \
+  (__builtin_nds32_v_uadd16 ((a), (b)))
+#define __nds32__v_sadd16(a, b) \
+  (__builtin_nds32_v_sadd16 ((a), (b)))
+#define __nds32__radd16(a, b) \
+  (__builtin_nds32_radd16 ((a), (b)))
+#define __nds32__v_radd16(a, b) \
+  (__builtin_nds32_v_radd16 ((a), (b)))
+#define __nds32__uradd16(a, b) \
+  (__builtin_nds32_uradd16 ((a), (b)))
+#define __nds32__v_uradd16(a, b) \
+  (__builtin_nds32_v_uradd16 ((a), (b)))
+#define __nds32__kadd16(a, b) \
+  (__builtin_nds32_kadd16 ((a), (b)))
+#define __nds32__v_kadd16(a, b) \
+  (__builtin_nds32_v_kadd16 ((a), (b)))
+#define __nds32__ukadd16(a, b) \
+  (__builtin_nds32_ukadd16 ((a), (b)))
+#define __nds32__v_ukadd16(a, b) \
+  (__builtin_nds32_v_ukadd16 ((a), (b)))
+#define __nds32__sub16(a, b) \
+  (__builtin_nds32_sub16 ((a), (b)))
+#define __nds32__v_usub16(a, b) \
+  (__builtin_nds32_v_usub16 ((a), (b)))
+#define __nds32__v_ssub16(a, b) \
+  (__builtin_nds32_v_ssub16 ((a), (b)))
+#define __nds32__rsub16(a, b) \
+  (__builtin_nds32_rsub16 ((a), (b)))
+#define __nds32__v_rsub16(a, b) \
+  (__builtin_nds32_v_rsub16 ((a), (b)))
+#define __nds32__ursub16(a, b) \
+  (__builtin_nds32_ursub16 ((a), (b)))
+#define __nds32__v_ursub16(a, b) \
+  (__builtin_nds32_v_ursub16 ((a), (b)))
+#define __nds32__ksub16(a, b) \
+  (__builtin_nds32_ksub16 ((a), (b)))
+#define __nds32__v_ksub16(a, b) \
+  (__builtin_nds32_v_ksub16 ((a), (b)))
+#define __nds32__uksub16(a, b) \
+  (__builtin_nds32_uksub16 ((a), (b)))
+#define __nds32__v_uksub16(a, b) \
+  (__builtin_nds32_v_uksub16 ((a), (b)))
+#define __nds32__cras16(a, b) \
+  (__builtin_nds32_cras16 ((a), (b)))
+#define __nds32__v_ucras16(a, b) \
+  (__builtin_nds32_v_ucras16 ((a), (b)))
+#define __nds32__v_scras16(a, b) \
+  (__builtin_nds32_v_scras16 ((a), (b)))
+#define __nds32__rcras16(a, b) \
+  (__builtin_nds32_rcras16 ((a), (b)))
+#define __nds32__v_rcras16(a, b) \
+  (__builtin_nds32_v_rcras16 ((a), (b)))
+#define __nds32__urcras16(a, b) \
+  (__builtin_nds32_urcras16 ((a), (b)))
+#define __nds32__v_urcras16(a, b) \
+  (__builtin_nds32_v_urcras16 ((a), (b)))
+#define __nds32__kcras16(a, b) \
+  (__builtin_nds32_kcras16 ((a), (b)))
+#define __nds32__v_kcras16(a, b) \
+  (__builtin_nds32_v_kcras16 ((a), (b)))
+#define __nds32__ukcras16(a, b) \
+  (__builtin_nds32_ukcras16 ((a), (b)))
+#define __nds32__v_ukcras16(a, b) \
+  (__builtin_nds32_v_ukcras16 ((a), (b)))
+#define __nds32__crsa16(a, b) \
+  (__builtin_nds32_crsa16 ((a), (b)))
+#define __nds32__v_ucrsa16(a, b) \
+  (__builtin_nds32_v_ucrsa16 ((a), (b)))
+#define __nds32__v_scrsa16(a, b) \
+  (__builtin_nds32_v_scrsa16 ((a), (b)))
+#define __nds32__rcrsa16(a, b) \
+  (__builtin_nds32_rcrsa16 ((a), (b)))
+#define __nds32__v_rcrsa16(a, b) \
+  (__builtin_nds32_v_rcrsa16 ((a), (b)))
+#define __nds32__urcrsa16(a, b) \
+  (__builtin_nds32_urcrsa16 ((a), (b)))
+#define __nds32__v_urcrsa16(a, b) \
+  (__builtin_nds32_v_urcrsa16 ((a), (b)))
+#define __nds32__kcrsa16(a, b) \
+  (__builtin_nds32_kcrsa16 ((a), (b)))
+#define __nds32__v_kcrsa16(a, b) \
+  (__builtin_nds32_v_kcrsa16 ((a), (b)))
+#define __nds32__ukcrsa16(a, b) \
+  (__builtin_nds32_ukcrsa16 ((a), (b)))
+#define __nds32__v_ukcrsa16(a, b) \
+  (__builtin_nds32_v_ukcrsa16 ((a), (b)))
+
+#define __nds32__add8(a, b) \
+  (__builtin_nds32_add8 ((a), (b)))
+#define __nds32__v_uadd8(a, b) \
+  (__builtin_nds32_v_uadd8 ((a), (b)))
+#define __nds32__v_sadd8(a, b) \
+  (__builtin_nds32_v_sadd8 ((a), (b)))
+#define __nds32__radd8(a, b) \
+  (__builtin_nds32_radd8 ((a), (b)))
+#define __nds32__v_radd8(a, b) \
+  (__builtin_nds32_v_radd8 ((a), (b)))
+#define __nds32__uradd8(a, b) \
+  (__builtin_nds32_uradd8 ((a), (b)))
+#define __nds32__v_uradd8(a, b) \
+  (__builtin_nds32_v_uradd8 ((a), (b)))
+#define __nds32__kadd8(a, b) \
+  (__builtin_nds32_kadd8 ((a), (b)))
+#define __nds32__v_kadd8(a, b) \
+  (__builtin_nds32_v_kadd8 ((a), (b)))
+#define __nds32__ukadd8(a, b) \
+  (__builtin_nds32_ukadd8 ((a), (b)))
+#define __nds32__v_ukadd8(a, b) \
+  (__builtin_nds32_v_ukadd8 ((a), (b)))
+#define __nds32__sub8(a, b) \
+  (__builtin_nds32_sub8 ((a), (b)))
+#define __nds32__v_usub8(a, b) \
+  (__builtin_nds32_v_usub8 ((a), (b)))
+#define __nds32__v_ssub8(a, b) \
+  (__builtin_nds32_v_ssub8 ((a), (b)))
+#define __nds32__rsub8(a, b) \
+  (__builtin_nds32_rsub8 ((a), (b)))
+#define __nds32__v_rsub8(a, b) \
+  (__builtin_nds32_v_rsub8 ((a), (b)))
+#define __nds32__ursub8(a, b) \
+  (__builtin_nds32_ursub8 ((a), (b)))
+#define __nds32__v_ursub8(a, b) \
+  (__builtin_nds32_v_ursub8 ((a), (b)))
+#define __nds32__ksub8(a, b) \
+  (__builtin_nds32_ksub8 ((a), (b)))
+#define __nds32__v_ksub8(a, b) \
+  (__builtin_nds32_v_ksub8 ((a), (b)))
+#define __nds32__uksub8(a, b) \
+  (__builtin_nds32_uksub8 ((a), (b)))
+#define __nds32__v_uksub8(a, b) \
+  (__builtin_nds32_v_uksub8 ((a), (b)))
+
+#define __nds32__sra16(a, b) \
+  (__builtin_nds32_sra16 ((a), (b)))
+#define __nds32__v_sra16(a, b) \
+  (__builtin_nds32_v_sra16 ((a), (b)))
+#define __nds32__sra16_u(a, b) \
+  (__builtin_nds32_sra16_u ((a), (b)))
+#define __nds32__v_sra16_u(a, b) \
+  (__builtin_nds32_v_sra16_u ((a), (b)))
+#define __nds32__srl16(a, b) \
+  (__builtin_nds32_srl16 ((a), (b)))
+#define __nds32__v_srl16(a, b) \
+  (__builtin_nds32_v_srl16 ((a), (b)))
+#define __nds32__srl16_u(a, b) \
+  (__builtin_nds32_srl16_u ((a), (b)))
+#define __nds32__v_srl16_u(a, b) \
+  (__builtin_nds32_v_srl16_u ((a), (b)))
+#define __nds32__sll16(a, b) \
+  (__builtin_nds32_sll16 ((a), (b)))
+#define __nds32__v_sll16(a, b) \
+  (__builtin_nds32_v_sll16 ((a), (b)))
+#define __nds32__ksll16(a, b) \
+  (__builtin_nds32_ksll16 ((a), (b)))
+#define __nds32__v_ksll16(a, b) \
+  (__builtin_nds32_v_ksll16 ((a), (b)))
+#define __nds32__kslra16(a, b) \
+  (__builtin_nds32_kslra16 ((a), (b)))
+#define __nds32__v_kslra16(a, b) \
+  (__builtin_nds32_v_kslra16 ((a), (b)))
+#define __nds32__kslra16_u(a, b) \
+  (__builtin_nds32_kslra16_u ((a), (b)))
+#define __nds32__v_kslra16_u(a, b) \
+  (__builtin_nds32_v_kslra16_u ((a), (b)))
+
+#define __nds32__cmpeq16(a, b) \
+  (__builtin_nds32_cmpeq16 ((a), (b)))
+#define __nds32__v_scmpeq16(a, b) \
+  (__builtin_nds32_v_scmpeq16 ((a), (b)))
+#define __nds32__v_ucmpeq16(a, b) \
+  (__builtin_nds32_v_ucmpeq16 ((a), (b)))
+#define __nds32__scmplt16(a, b) \
+  (__builtin_nds32_scmplt16 ((a), (b)))
+#define __nds32__v_scmplt16(a, b) \
+  (__builtin_nds32_v_scmplt16 ((a), (b)))
+#define __nds32__scmple16(a, b) \
+  (__builtin_nds32_scmple16 ((a), (b)))
+#define __nds32__v_scmple16(a, b) \
+  (__builtin_nds32_v_scmple16 ((a), (b)))
+#define __nds32__ucmplt16(a, b) \
+  (__builtin_nds32_ucmplt16 ((a), (b)))
+#define __nds32__v_ucmplt16(a, b) \
+  (__builtin_nds32_v_ucmplt16 ((a), (b)))
+#define __nds32__ucmple16(a, b) \
+  (__builtin_nds32_ucmple16 ((a), (b)))
+#define __nds32__v_ucmple16(a, b) \
+  (__builtin_nds32_v_ucmple16 ((a), (b)))
+
+#define __nds32__cmpeq8(a, b) \
+  (__builtin_nds32_cmpeq8 ((a), (b)))
+#define __nds32__v_scmpeq8(a, b) \
+  (__builtin_nds32_v_scmpeq8 ((a), (b)))
+#define __nds32__v_ucmpeq8(a, b) \
+  (__builtin_nds32_v_ucmpeq8 ((a), (b)))
+#define __nds32__scmplt8(a, b) \
+  (__builtin_nds32_scmplt8 ((a), (b)))
+#define __nds32__v_scmplt8(a, b) \
+  (__builtin_nds32_v_scmplt8 ((a), (b)))
+#define __nds32__scmple8(a, b) \
+  (__builtin_nds32_scmple8 ((a), (b)))
+#define __nds32__v_scmple8(a, b) \
+  (__builtin_nds32_v_scmple8 ((a), (b)))
+#define __nds32__ucmplt8(a, b) \
+  (__builtin_nds32_ucmplt8 ((a), (b)))
+#define __nds32__v_ucmplt8(a, b) \
+  (__builtin_nds32_v_ucmplt8 ((a), (b)))
+#define __nds32__ucmple8(a, b) \
+  (__builtin_nds32_ucmple8 ((a), (b)))
+#define __nds32__v_ucmple8(a, b) \
+  (__builtin_nds32_v_ucmple8 ((a), (b)))
+
+#define __nds32__smin16(a, b) \
+  (__builtin_nds32_smin16 ((a), (b)))
+#define __nds32__v_smin16(a, b) \
+  (__builtin_nds32_v_smin16 ((a), (b)))
+#define __nds32__umin16(a, b) \
+  (__builtin_nds32_umin16 ((a), (b)))
+#define __nds32__v_umin16(a, b) \
+  (__builtin_nds32_v_umin16 ((a), (b)))
+#define __nds32__smax16(a, b) \
+  (__builtin_nds32_smax16 ((a), (b)))
+#define __nds32__v_smax16(a, b) \
+  (__builtin_nds32_v_smax16 ((a), (b)))
+#define __nds32__umax16(a, b) \
+  (__builtin_nds32_umax16 ((a), (b)))
+#define __nds32__v_umax16(a, b) \
+  (__builtin_nds32_v_umax16 ((a), (b)))
+#define __nds32__sclip16(a, b) \
+  (__builtin_nds32_sclip16 ((a), (b)))
+#define __nds32__v_sclip16(a, b) \
+  (__builtin_nds32_v_sclip16 ((a), (b)))
+#define __nds32__uclip16(a, b) \
+  (__builtin_nds32_uclip16 ((a), (b)))
+#define __nds32__v_uclip16(a, b) \
+  (__builtin_nds32_v_uclip16 ((a), (b)))
+#define __nds32__khm16(a, b) \
+  (__builtin_nds32_khm16 ((a), (b)))
+#define __nds32__v_khm16(a, b) \
+  (__builtin_nds32_v_khm16 ((a), (b)))
+#define __nds32__khmx16(a, b) \
+  (__builtin_nds32_khmx16 ((a), (b)))
+#define __nds32__v_khmx16(a, b) \
+  (__builtin_nds32_v_khmx16 ((a), (b)))
+#define __nds32__kabs16(a) \
+  (__builtin_nds32_kabs16 ((a)))
+#define __nds32__v_kabs16(a) \
+  (__builtin_nds32_v_kabs16 ((a)))
+
+#define __nds32__smin8(a, b) \
+  (__builtin_nds32_smin8 ((a), (b)))
+#define __nds32__v_smin8(a, b) \
+  (__builtin_nds32_v_smin8 ((a), (b)))
+#define __nds32__umin8(a, b) \
+  (__builtin_nds32_umin8 ((a), (b)))
+#define __nds32__v_umin8(a, b) \
+  (__builtin_nds32_v_umin8 ((a), (b)))
+#define __nds32__smax8(a, b) \
+  (__builtin_nds32_smax8 ((a), (b)))
+#define __nds32__v_smax8(a, b) \
+  (__builtin_nds32_v_smax8 ((a), (b)))
+#define __nds32__umax8(a, b) \
+  (__builtin_nds32_umax8 ((a), (b)))
+#define __nds32__v_umax8(a, b) \
+  (__builtin_nds32_v_umax8 ((a), (b)))
+#define __nds32__kabs8(a) \
+  (__builtin_nds32_kabs8 ((a)))
+#define __nds32__v_kabs8(a) \
+  (__builtin_nds32_v_kabs8 ((a)))
+
+#define __nds32__sunpkd810(a) \
+  (__builtin_nds32_sunpkd810 ((a)))
+#define __nds32__v_sunpkd810(a) \
+  (__builtin_nds32_v_sunpkd810 ((a)))
+#define __nds32__sunpkd820(a) \
+  (__builtin_nds32_sunpkd820 ((a)))
+#define __nds32__v_sunpkd820(a) \
+  (__builtin_nds32_v_sunpkd820 ((a)))
+#define __nds32__sunpkd830(a) \
+  (__builtin_nds32_sunpkd830 ((a)))
+#define __nds32__v_sunpkd830(a) \
+  (__builtin_nds32_v_sunpkd830 ((a)))
+#define __nds32__sunpkd831(a) \
+  (__builtin_nds32_sunpkd831 ((a)))
+#define __nds32__v_sunpkd831(a) \
+  (__builtin_nds32_v_sunpkd831 ((a)))
+#define __nds32__zunpkd810(a) \
+  (__builtin_nds32_zunpkd810 ((a)))
+#define __nds32__v_zunpkd810(a) \
+  (__builtin_nds32_v_zunpkd810 ((a)))
+#define __nds32__zunpkd820(a) \
+  (__builtin_nds32_zunpkd820 ((a)))
+#define __nds32__v_zunpkd820(a) \
+  (__builtin_nds32_v_zunpkd820 ((a)))
+#define __nds32__zunpkd830(a) \
+  (__builtin_nds32_zunpkd830 ((a)))
+#define __nds32__v_zunpkd830(a) \
+  (__builtin_nds32_v_zunpkd830 ((a)))
+#define __nds32__zunpkd831(a) \
+  (__builtin_nds32_zunpkd831 ((a)))
+#define __nds32__v_zunpkd831(a) \
+  (__builtin_nds32_v_zunpkd831 ((a)))
+
+#define __nds32__raddw(a, b) \
+  (__builtin_nds32_raddw ((a), (b)))
+#define __nds32__uraddw(a, b) \
+  (__builtin_nds32_uraddw ((a), (b)))
+#define __nds32__rsubw(a, b) \
+  (__builtin_nds32_rsubw ((a), (b)))
+#define __nds32__ursubw(a, b) \
+  (__builtin_nds32_ursubw ((a), (b)))
+
+#define __nds32__sra_u(a, b) \
+  (__builtin_nds32_sra_u ((a), (b)))
+#define __nds32__ksll(a, b) \
+  (__builtin_nds32_ksll ((a), (b)))
+#define __nds32__pkbb16(a, b) \
+  (__builtin_nds32_pkbb16 ((a), (b)))
+#define __nds32__v_pkbb16(a, b) \
+  (__builtin_nds32_v_pkbb16 ((a), (b)))
+#define __nds32__pkbt16(a, b) \
+  (__builtin_nds32_pkbt16 ((a), (b)))
+#define __nds32__v_pkbt16(a, b) \
+  (__builtin_nds32_v_pkbt16 ((a), (b)))
+#define __nds32__pktb16(a, b) \
+  (__builtin_nds32_pktb16 ((a), (b)))
+#define __nds32__v_pktb16(a, b) \
+  (__builtin_nds32_v_pktb16 ((a), (b)))
+#define __nds32__pktt16(a, b) \
+  (__builtin_nds32_pktt16 ((a), (b)))
+#define __nds32__v_pktt16(a, b) \
+  (__builtin_nds32_v_pktt16 ((a), (b)))
+
+#define __nds32__smmul(a, b) \
+  (__builtin_nds32_smmul ((a), (b)))
+#define __nds32__smmul_u(a, b) \
+  (__builtin_nds32_smmul_u ((a), (b)))
+#define __nds32__kmmac(r, a, b) \
+  (__builtin_nds32_kmmac ((r), (a), (b)))
+#define __nds32__kmmac_u(r, a, b) \
+  (__builtin_nds32_kmmac_u ((r), (a), (b)))
+#define __nds32__kmmsb(r, a, b) \
+  (__builtin_nds32_kmmsb ((r), (a), (b)))
+#define __nds32__kmmsb_u(r, a, b) \
+  (__builtin_nds32_kmmsb_u ((r), (a), (b)))
+#define __nds32__kwmmul(a, b) \
+  (__builtin_nds32_kwmmul ((a), (b)))
+#define __nds32__kwmmul_u(a, b) \
+  (__builtin_nds32_kwmmul_u ((a), (b)))
+
+#define __nds32__smmwb(a, b) \
+  (__builtin_nds32_smmwb ((a), (b)))
+#define __nds32__v_smmwb(a, b) \
+  (__builtin_nds32_v_smmwb ((a), (b)))
+#define __nds32__smmwb_u(a, b) \
+  (__builtin_nds32_smmwb_u ((a), (b)))
+#define __nds32__v_smmwb_u(a, b) \
+  (__builtin_nds32_v_smmwb_u ((a), (b)))
+#define __nds32__smmwt(a, b) \
+  (__builtin_nds32_smmwt ((a), (b)))
+#define __nds32__v_smmwt(a, b) \
+  (__builtin_nds32_v_smmwt ((a), (b)))
+#define __nds32__smmwt_u(a, b) \
+  (__builtin_nds32_smmwt_u ((a), (b)))
+#define __nds32__v_smmwt_u(a, b) \
+  (__builtin_nds32_v_smmwt_u ((a), (b)))
+#define __nds32__kmmawb(r, a, b) \
+  (__builtin_nds32_kmmawb ((r), (a), (b)))
+#define __nds32__v_kmmawb(r, a, b) \
+  (__builtin_nds32_v_kmmawb ((r), (a), (b)))
+#define __nds32__kmmawb_u(r, a, b) \
+  (__builtin_nds32_kmmawb_u ((r), (a), (b)))
+#define __nds32__v_kmmawb_u(r, a, b) \
+  (__builtin_nds32_v_kmmawb_u ((r), (a), (b)))
+#define __nds32__kmmawt(r, a, b) \
+  (__builtin_nds32_kmmawt ((r), (a), (b)))
+#define __nds32__v_kmmawt(r, a, b) \
+  (__builtin_nds32_v_kmmawt ((r), (a), (b)))
+#define __nds32__kmmawt_u(r, a, b) \
+  (__builtin_nds32_kmmawt_u ((r), (a), (b)))
+#define __nds32__v_kmmawt_u(r, a, b) \
+  (__builtin_nds32_v_kmmawt_u ((r), (a), (b)))
+
+#define __nds32__smbb(a, b) \
+  (__builtin_nds32_smbb ((a), (b)))
+#define __nds32__v_smbb(a, b) \
+  (__builtin_nds32_v_smbb ((a), (b)))
+#define __nds32__smbt(a, b) \
+  (__builtin_nds32_smbt ((a), (b)))
+#define __nds32__v_smbt(a, b) \
+  (__builtin_nds32_v_smbt ((a), (b)))
+#define __nds32__smtt(a, b) \
+  (__builtin_nds32_smtt ((a), (b)))
+#define __nds32__v_smtt(a, b) \
+  (__builtin_nds32_v_smtt ((a), (b)))
+#define __nds32__kmda(a, b) \
+  (__builtin_nds32_kmda ((a), (b)))
+#define __nds32__v_kmda(a, b) \
+  (__builtin_nds32_v_kmda ((a), (b)))
+#define __nds32__kmxda(a, b) \
+  (__builtin_nds32_kmxda ((a), (b)))
+#define __nds32__v_kmxda(a, b) \
+  (__builtin_nds32_v_kmxda ((a), (b)))
+#define __nds32__smds(a, b) \
+  (__builtin_nds32_smds ((a), (b)))
+#define __nds32__v_smds(a, b) \
+  (__builtin_nds32_v_smds ((a), (b)))
+#define __nds32__smdrs(a, b) \
+  (__builtin_nds32_smdrs ((a), (b)))
+#define __nds32__v_smdrs(a, b) \
+  (__builtin_nds32_v_smdrs ((a), (b)))
+#define __nds32__smxds(a, b) \
+  (__builtin_nds32_smxds ((a), (b)))
+#define __nds32__v_smxds(a, b) \
+  (__builtin_nds32_v_smxds ((a), (b)))
+#define __nds32__kmabb(r, a, b) \
+  (__builtin_nds32_kmabb ((r), (a), (b)))
+#define __nds32__v_kmabb(r, a, b) \
+  (__builtin_nds32_v_kmabb ((r), (a), (b)))
+#define __nds32__kmabt(r, a, b) \
+  (__builtin_nds32_kmabt ((r), (a), (b)))
+#define __nds32__v_kmabt(r, a, b) \
+  (__builtin_nds32_v_kmabt ((r), (a), (b)))
+#define __nds32__kmatt(r, a, b) \
+  (__builtin_nds32_kmatt ((r), (a), (b)))
+#define __nds32__v_kmatt(r, a, b) \
+  (__builtin_nds32_v_kmatt ((r), (a), (b)))
+#define __nds32__kmada(r, a, b) \
+  (__builtin_nds32_kmada ((r), (a), (b)))
+#define __nds32__v_kmada(r, a, b) \
+  (__builtin_nds32_v_kmada ((r), (a), (b)))
+#define __nds32__kmaxda(r, a, b) \
+  (__builtin_nds32_kmaxda ((r), (a), (b)))
+#define __nds32__v_kmaxda(r, a, b) \
+  (__builtin_nds32_v_kmaxda ((r), (a), (b)))
+#define __nds32__kmads(r, a, b) \
+  (__builtin_nds32_kmads ((r), (a), (b)))
+#define __nds32__v_kmads(r, a, b) \
+  (__builtin_nds32_v_kmads ((r), (a), (b)))
+#define __nds32__kmadrs(r, a, b) \
+  (__builtin_nds32_kmadrs ((r), (a), (b)))
+#define __nds32__v_kmadrs(r, a, b) \
+  (__builtin_nds32_v_kmadrs ((r), (a), (b)))
+#define __nds32__kmaxds(r, a, b) \
+  (__builtin_nds32_kmaxds ((r), (a), (b)))
+#define __nds32__v_kmaxds(r, a, b) \
+  (__builtin_nds32_v_kmaxds ((r), (a), (b)))
+#define __nds32__kmsda(r, a, b) \
+  (__builtin_nds32_kmsda ((r), (a), (b)))
+#define __nds32__v_kmsda(r, a, b) \
+  (__builtin_nds32_v_kmsda ((r), (a), (b)))
+#define __nds32__kmsxda(r, a, b) \
+  (__builtin_nds32_kmsxda ((r), (a), (b)))
+#define __nds32__v_kmsxda(r, a, b) \
+  (__builtin_nds32_v_kmsxda ((r), (a), (b)))
+
+#define __nds32__smal(a, b) \
+  (__builtin_nds32_smal ((a), (b)))
+#define __nds32__v_smal(a, b) \
+  (__builtin_nds32_v_smal ((a), (b)))
+
+#define __nds32__bitrev(a, b) \
+  (__builtin_nds32_bitrev ((a), (b)))
+#define __nds32__wext(a, b) \
+  (__builtin_nds32_wext ((a), (b)))
+#define __nds32__bpick(r, a, b) \
+  (__builtin_nds32_bpick ((r), (a), (b)))
+#define __nds32__insb(r, a, b) \
+  (__builtin_nds32_insb ((r), (a), (b)))
+
+#define __nds32__sadd64(a, b) \
+  (__builtin_nds32_sadd64 ((a), (b)))
+#define __nds32__uadd64(a, b) \
+  (__builtin_nds32_uadd64 ((a), (b)))
+#define __nds32__radd64(a, b) \
+  (__builtin_nds32_radd64 ((a), (b)))
+#define __nds32__uradd64(a, b) \
+  (__builtin_nds32_uradd64 ((a), (b)))
+#define __nds32__kadd64(a, b) \
+  (__builtin_nds32_kadd64 ((a), (b)))
+#define __nds32__ukadd64(a, b) \
+  (__builtin_nds32_ukadd64 ((a), (b)))
+#define __nds32__ssub64(a, b) \
+  (__builtin_nds32_ssub64 ((a), (b)))
+#define __nds32__usub64(a, b) \
+  (__builtin_nds32_usub64 ((a), (b)))
+#define __nds32__rsub64(a, b) \
+  (__builtin_nds32_rsub64 ((a), (b)))
+#define __nds32__ursub64(a, b) \
+  (__builtin_nds32_ursub64 ((a), (b)))
+#define __nds32__ksub64(a, b) \
+  (__builtin_nds32_ksub64 ((a), (b)))
+#define __nds32__uksub64(a, b) \
+  (__builtin_nds32_uksub64 ((a), (b)))
+
+#define __nds32__smar64(r, a, b) \
+  (__builtin_nds32_smar64 ((r), (a), (b)))
+#define __nds32__smsr64(r, a, b) \
+  (__builtin_nds32_smsr64 ((r), (a), (b)))
+#define __nds32__umar64(r, a, b) \
+  (__builtin_nds32_umar64 ((r), (a), (b)))
+#define __nds32__umsr64(r, a, b) \
+  (__builtin_nds32_umsr64 ((r), (a), (b)))
+#define __nds32__kmar64(r, a, b) \
+  (__builtin_nds32_kmar64 ((r), (a), (b)))
+#define __nds32__kmsr64(r, a, b) \
+  (__builtin_nds32_kmsr64 ((r), (a), (b)))
+#define __nds32__ukmar64(r, a, b) \
+  (__builtin_nds32_ukmar64 ((r), (a), (b)))
+#define __nds32__ukmsr64(r, a, b) \
+  (__builtin_nds32_ukmsr64 ((r), (a), (b)))
+
+#define __nds32__smalbb(r, a, b) \
+  (__builtin_nds32_smalbb ((r), (a), (b)))
+#define __nds32__v_smalbb(r, a, b) \
+  (__builtin_nds32_v_smalbb ((r), (a), (b)))
+#define __nds32__smalbt(r, a, b) \
+  (__builtin_nds32_smalbt ((r), (a), (b)))
+#define __nds32__v_smalbt(r, a, b) \
+  (__builtin_nds32_v_smalbt ((r), (a), (b)))
+#define __nds32__smaltt(r, a, b) \
+  (__builtin_nds32_smaltt ((r), (a), (b)))
+#define __nds32__v_smaltt(r, a, b) \
+  (__builtin_nds32_v_smaltt ((r), (a), (b)))
+#define __nds32__smalda(r, a, b) \
+  (__builtin_nds32_smalda ((r), (a), (b)))
+#define __nds32__v_smalda(r, a, b) \
+  (__builtin_nds32_v_smalda ((r), (a), (b)))
+#define __nds32__smalxda(r, a, b) \
+  (__builtin_nds32_smalxda ((r), (a), (b)))
+#define __nds32__v_smalxda(r, a, b) \
+  (__builtin_nds32_v_smalxda ((r), (a), (b)))
+#define __nds32__smalds(r, a, b) \
+  (__builtin_nds32_smalds ((r), (a), (b)))
+#define __nds32__v_smalds(r, a, b) \
+  (__builtin_nds32_v_smalds ((r), (a), (b)))
+#define __nds32__smaldrs(r, a, b) \
+  (__builtin_nds32_smaldrs ((r), (a), (b)))
+#define __nds32__v_smaldrs(r, a, b) \
+  (__builtin_nds32_v_smaldrs ((r), (a), (b)))
+#define __nds32__smalxds(r, a, b) \
+  (__builtin_nds32_smalxds ((r), (a), (b)))
+#define __nds32__v_smalxds(r, a, b) \
+  (__builtin_nds32_v_smalxds ((r), (a), (b)))
+#define __nds32__smslda(r, a, b) \
+  (__builtin_nds32_smslda ((r), (a), (b)))
+#define __nds32__v_smslda(r, a, b) \
+  (__builtin_nds32_v_smslda ((r), (a), (b)))
+#define __nds32__smslxda(r, a, b) \
+  (__builtin_nds32_smslxda ((r), (a), (b)))
+#define __nds32__v_smslxda(r, a, b) \
+  (__builtin_nds32_v_smslxda ((r), (a), (b)))
+
+#define __nds32__smul16(a, b) \
+  (__builtin_nds32_smul16 ((a), (b)))
+#define __nds32__v_smul16(a, b) \
+  (__builtin_nds32_v_smul16 ((a), (b)))
+#define __nds32__smulx16(a, b) \
+  (__builtin_nds32_smulx16 ((a), (b)))
+#define __nds32__v_smulx16(a, b) \
+  (__builtin_nds32_v_smulx16 ((a), (b)))
+#define __nds32__umul16(a, b) \
+  (__builtin_nds32_umul16 ((a), (b)))
+#define __nds32__v_umul16(a, b) \
+  (__builtin_nds32_v_umul16 ((a), (b)))
+#define __nds32__umulx16(a, b) \
+  (__builtin_nds32_umulx16 ((a), (b)))
+#define __nds32__v_umulx16(a, b) \
+  (__builtin_nds32_v_umulx16 ((a), (b)))
+
+#define __nds32__uclip32(a, imm) \
+  (__builtin_nds32_uclip32 ((a), (imm)))
+#define __nds32__sclip32(a, imm) \
+  (__builtin_nds32_sclip32 ((a), (imm)))
+#define __nds32__kabs(a) \
+  (__builtin_nds32_kabs ((a)))
+
+#define __nds32__no_ext_zol() \
+  (__builtin_nds32_no_ext_zol())
+
+#define __nds32__unaligned_feature() \
+  (__builtin_nds32_unaligned_feature())
+#define __nds32__enable_unaligned() \
+  (__builtin_nds32_enable_unaligned())
+#define __nds32__disable_unaligned() \
+  (__builtin_nds32_disable_unaligned())
+
+#define NDS32ATTR_SIGNATURE              __attribute__((signature))
+
 #endif /* nds32_intrinsic.h */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-intrinsic.md gcc-4.9.4/gcc/config/nds32/nds32-intrinsic.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-intrinsic.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-intrinsic.md	2016-08-08 20:37:45.502269936 +0200
@@ -1,5 +1,5 @@
 ;; Intrinsic patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -40,6 +40,26 @@
    (set_attr "length"    "4")]
 )
 
+(define_expand "mtsr_isb"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  ""
+{
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
+  emit_insn (gen_unspec_volatile_isb());
+  DONE;
+})
+
+(define_expand "mtsr_dsb"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  ""
+{
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], operands[1]));
+  emit_insn (gen_unspec_dsb());
+  DONE;
+})
+
 (define_insn "unspec_volatile_mtsr"
   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
 			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)]
@@ -58,6 +78,74 @@
    (set_attr "length"    "4")]
 )
 
+;; FPU Register Transfer.
+
+(define_insn "unspec_fcpynsd"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYNSD))]
+  ""
+  "fcpynsd\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpynss"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYNSS))]
+  ""
+  "fcpynss\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpysd"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+	 (unspec:DF [(match_operand:DF 1 "register_operand" "f")
+		     (match_operand:DF 2 "register_operand" "f")] UNSPEC_FCPYSD))]
+  ""
+  "fcpysd\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fcpyss"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+	 (unspec:SF [(match_operand:SF 1 "register_operand" "f")
+		     (match_operand:SF 2 "register_operand" "f")] UNSPEC_FCPYSS))]
+  ""
+  "fcpyss\t%0, %1, %2"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmfcsr"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+	 (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCSR))]
+  ""
+  "fmfcsr\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmtcsr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_FMTCSR)]
+  ""
+  "fmtcsr\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_fmfcfg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_FMFCFG))]
+  ""
+  "fmfcfg\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
 ;; ------------------------------------------------------------------------
 
 ;; Interrupt Instructions.
@@ -76,6 +164,330 @@
   [(set_attr "type" "misc")]
 )
 
+(define_expand "unspec_enable_int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_ENABLE_INT)]
+  ""
+{
+  rtx system_reg;
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+      && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      /* The $INT_MASK2 sixteenth bit correspond to H16, so need
+	 subtract 16.  */
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
+      operands[0] = GEN_INT (1 << ((INTVAL (operands[0]) - 16)));
+    }
+  else
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
+      operands[0] = GEN_INT (1 << (INTVAL (operands[0])));
+    }
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, operands[0]));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_disable_int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_DISABLE_INT)]
+  ""
+{
+  rtx system_reg;
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+      && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK2__);
+      /* The $INT_MASK2 sixteenth bit correspond to H16, so need
+	 subtract 16.  */
+      operands[0] = GEN_INT ( ~(1 << (INTVAL (operands[0]) - 16)));
+    }
+  else
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_MASK__);
+      operands[0] = GEN_INT ( ~(1 << (INTVAL (operands[0]))));
+    }
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, operands[0]));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_set_pending_swint"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SET_PENDING_SWINT)]
+  ""
+{
+  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, GEN_INT (65536)));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_clr_pending_swint"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLR_PENDING_SWINT)]
+  ""
+{
+  /* Get $INT_PEND system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_INT_PEND__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, GEN_INT (~(1 << 16))));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_clr_pending_hwint"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_CLR_PENDING_HWINT)]
+  ""
+{
+  rtx system_reg;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx clr_hwint;
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[0]) >= NDS32_INT_H0)
+      && (INTVAL (operands[0]) <= NDS32_INT_H15))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+      clr_hwint = GEN_INT (~(1 << INTVAL (operands[0])));
+    }
+  else if ((INTVAL (operands[0]) >= NDS32_INT_H16)
+	   && (INTVAL (operands[0]) <= NDS32_INT_H31))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
+      /* The $INT_PEND2 sixteenth bit correspond to H16, so need
+	 subtract 16.  */
+      clr_hwint = GEN_INT (~(1 << (INTVAL (operands[0]) - 16)));
+    }
+  else
+    error ("__nds32__clr_pending_hwint not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, clr_hwint));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_get_all_pending_int"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_GET_ALL_PENDING_INT))]
+  ""
+{
+  rtx system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_get_pending_int"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_PENDING_INT))]
+  ""
+{
+  rtx system_reg;
+
+  /* Set system register form nds32_intrinsic_register_names[].  */
+  if ((INTVAL (operands[1]) >= NDS32_INT_H0)
+      && (INTVAL (operands[1]) <= NDS32_INT_SWI))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
+      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+    }
+  else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
+	   && (INTVAL (operands[1]) <= NDS32_INT_H31))
+    {
+      system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
+      /* The $INT_PEND2 sixteenth bit correspond to H16, so need
+	 subtract 16.  */
+      operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 16));
+    }
+  else
+    error ("get_pending_int not support NDS32_INT_ALZ,"
+	   " NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  /* mfsr op0, sytem_reg  */
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_set_int_priority"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")
+			(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_SET_INT_PRIORITY)]
+  ""
+{
+  rtx system_reg;
+  rtx priority;
+  rtx mask;
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx mask_reg = gen_reg_rtx (SImode);
+  rtx set_reg = gen_reg_rtx (SImode);
+
+  /* Get system register form nds32_intrinsic_register_names[].  */
+  if (INTVAL (operands[0]) <= NDS32_INT_H15)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
+      mask = GEN_INT (~(3 << 2 * INTVAL (operands[0])));
+      priority = GEN_INT ((int) (INTVAL (operands[1])
+				 << (INTVAL (operands[0]) * 2)));
+    }
+  else if (INTVAL (operands[0]) >= NDS32_INT_H16
+	   && INTVAL (operands[0]) <= NDS32_INT_H31)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
+      /* The $INT_PRI2 first bit correspond to H16, so need
+	 subtract 32.  */
+      mask = GEN_INT (~(3 << 2 * (INTVAL (operands[0]) - 32)));
+      priority = GEN_INT ((int) (INTVAL (operands[1])
+				 << ((INTVAL (operands[0]) - 32) * 2)));
+    }
+  else
+    error ("set_int_priority not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  emit_move_insn (mask_reg, mask);
+  emit_move_insn (set_reg, priority);
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, mask_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_get_int_priority"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_INT_PRIORITY))]
+  ""
+{
+  rtx system_reg;
+
+  /* Get system register form nds32_intrinsic_register_names[]  */
+  if (INTVAL (operands[1]) <= NDS32_INT_H15)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI__);
+      operands[2] = GEN_INT (31 - 2 * INTVAL (operands[1]));
+    }
+  else if (INTVAL (operands[1]) >= NDS32_INT_H16
+	   && INTVAL (operands[1]) <= NDS32_INT_H31)
+    {
+      system_reg =  GEN_INT (__NDS32_REG_INT_PRI2__);
+      /* The $INT_PRI2 first bit correspond to H16, so need
+	 subtract 32.  */
+      operands[2] = GEN_INT (31 - 2 * (INTVAL (operands[1]) - 32));
+    }
+  else
+    error ("set_int_priority not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (30)));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_set_trig_level"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_LEVEL)]
+  ""
+{
+  rtx system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx set_level;
+
+  if ((INTVAL (operands[0]) == NDS32_INT_SWI)
+      || (INTVAL (operands[0]) == NDS32_INT_ALZ)
+      || (INTVAL (operands[0]) == NDS32_INT_IDIVZE)
+      || (INTVAL (operands[0]) == NDS32_INT_DSSIM))
+    error ("__nds32__set_trig_type_level not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
+  if (INTVAL (operands[0]) > NDS32_INT_H15)
+    set_level = GEN_INT (~(1 << (INTVAL (operands[0]) - 16)));
+  else
+    set_level = GEN_INT (~(1 << INTVAL (operands[0])));
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, set_level));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  DONE;
+})
+
+(define_expand "unspec_set_trig_edge"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "")] UNSPEC_VOLATILE_SET_TRIG_EDGE)]
+  ""
+{
+  rtx system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx set_level;
+
+  if ((INTVAL (operands[0]) == NDS32_INT_SWI)
+      || (INTVAL (operands[0]) == NDS32_INT_ALZ)
+      || (INTVAL (operands[0]) == NDS32_INT_IDIVZE)
+      || (INTVAL (operands[0]) == NDS32_INT_DSSIM))
+    error ("__nds32__set_trig_type_edge not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  /* TRIGGER register, 0 mean level triggered and 1 mean edge triggered. */
+  if (INTVAL (operands[0]) > NDS32_INT_H15)
+    set_level = GEN_INT ((1 << (INTVAL (operands[0]) - 16)));
+  else
+    set_level = GEN_INT ((1 << INTVAL (operands[0])));
+
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, set_level));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  DONE;
+})
+
+(define_expand "unspec_get_trig_type"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_VOLATILE_GET_TRIG_TYPE))]
+  ""
+{
+  rtx system_reg = GEN_INT (__NDS32_REG_INT_TRIGGER__);
+  rtx trig_type;
+
+  if ((INTVAL (operands[1]) == NDS32_INT_SWI)
+      || (INTVAL (operands[1]) == NDS32_INT_ALZ)
+      || (INTVAL (operands[1]) == NDS32_INT_IDIVZE)
+      || (INTVAL (operands[1]) == NDS32_INT_DSSIM))
+    error ("__nds32__get_trig_type not support NDS32_INT_SWI,"
+	   " NDS32_INT_ALZ, NDS32_INT_IDIVZE, NDS32_INT_DSSIM");
+
+  if (INTVAL (operands[1]) > NDS32_INT_H15)
+    trig_type = GEN_INT (31 - (INTVAL (operands[1]) - 16));
+  else
+    trig_type = GEN_INT (31 - INTVAL (operands[1]));
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], trig_type));
+  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
 ;; ------------------------------------------------------------------------
 
 ;; Cache Synchronization Instructions
@@ -84,7 +496,7 @@
   [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)]
   ""
   "isync\t%0"
-  [(set_attr "type" "misc")]
+  [(set_attr "type" "mmu")]
 )
 
 (define_insn "unspec_volatile_isb"
@@ -94,4 +506,1061 @@
   [(set_attr "type" "misc")]
 )
 
+(define_insn "unspec_dsb"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_DSB)]
+  ""
+  "dsb"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_MSYNC)]
+  ""
+  "msync\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync_all"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_ALL)]
+  ""
+  "msync\tall"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_msync_store"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_MSYNC_STORE)]
+  ""
+  "msync\tstore"
+  [(set_attr "type" "misc")]
+)
+
+;; Load and Store
+
+(define_insn "unspec_volatile_llw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_VOLATILE_LLW))]
+  ""
+  "llw\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_lwup"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LWUP))]
+  ""
+  "lwup\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_lbup"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))] UNSPEC_LBUP))]
+  ""
+  "lbup\t%0, [%1 + %2]"
+  [(set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_scw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+					      (match_operand:SI 2 "register_operand" "r")))
+			     (match_operand:SI 3 "register_operand" "0")] UNSPEC_VOLATILE_SCW))]
+  ""
+  "scw\t%0, [%1 + %2]"
+  [(set_attr "length"     "4")]
+)
+
+(define_insn "unspec_swup"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand:SI 1 "register_operand" "r")))
+	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SWUP))]
+  ""
+  "swup\t%2, [%0 + %1]"
+  [(set_attr "length"     "4")]
+)
+
+(define_insn "unspec_sbup"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand:SI 1 "register_operand" "r")))
+	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SBUP))]
+  ""
+  "sbup\t%2, [%0 + %1]"
+  [(set_attr "length"     "4")]
+)
+
+;; CCTL
+
+(define_insn "cctl_l1d_invalall"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_INVALALL)]
+  ""
+  "cctl\tL1D_INVALALL"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_l1d_wball_alvl"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ALVL)]
+  ""
+  "cctl\tL1D_WBALL, alevel"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_l1d_wball_one_lvl"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CCTL_L1D_WBALL_ONE_LVL)]
+  ""
+  "cctl\tL1D_WBALL, 1level"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_read"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")
+			     (match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_READ))]
+  ""
+  "cctl\t%0, %2, %X1"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_write"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")
+			(match_operand:SI 2 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WRITE)]
+  ""
+  "cctl\t%1, %2, %W0"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_wbinval_l1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_L1)]
+  ""
+  "cctl\t%1, %U0, 1level"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_wbinval_la"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_WBINVAL_LA)]
+  ""
+  "cctl\t%1, %U0, alevel"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_idx_wbinval"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_IDX_WBINVAL)]
+  ""
+  "cctl\t%1, %T0"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "cctl_va_lck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_CCTL_VA_LCK)]
+  ""
+  "cctl\t%1, %R0"
+  [(set_attr "type" "mmu")]
+)
+
+;;PREFETCH
+
+(define_insn "prefetch_qw"
+  [(unspec_volatile:QI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "nonmemory_operand" "r")
+			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_QW)]
+  ""
+  "dpref\t%Z2, [%0 + %1]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_hw"
+  [(unspec_volatile:HI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "nonmemory_operand" "r")
+			(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_DPREF_HW)]
+  ""
+  "dpref\t%Z2, [%0 + (%1<<1)]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_w"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "    r, r")
+			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
+			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_W)]
+  ""
+  "@
+  dprefi.w\t%Z2, [%0 + %1]
+  dpref\t%Z2, [%0 + (%1<<2)]"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "prefetch_dw"
+  [(unspec_volatile:DI [(match_operand:SI 0 "register_operand"  "   r, r")
+			(match_operand:SI 1 "nonmemory_operand" "Is15, r")
+			(match_operand:SI 2 "immediate_operand" "   i, i")] UNSPEC_VOLATILE_DPREF_DW)]
+  ""
+  "@
+  dprefi.d\t%Z2, [%0 + %1]
+  dpref\t%Z2, [%0 + (%1<<3)]"
+  [(set_attr "type" "misc")]
+)
+
+;; Performance Extension
+
+(define_insn "unspec_ave"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_AVE))]
+  ""
+  "ave\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_bclr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_BCLR))]
+  ""
+  "bclr\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_bset"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_BSET))]
+  ""
+  "bset\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_btgl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_BTGL))]
+  ""
+  "btgl\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_btst"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_BTST))]
+  ""
+  "btst\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_clip"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP))]
+  ""
+  "clip\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_clips"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS))]
+  ""
+  "clips\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_clo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_CLO))]
+  ""
+  "clo\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_ssabssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_ABS))]
+  ""
+  "abs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; Performance extension 2
+
+(define_insn "unspec_pbsad"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_PBSAD))]
+  ""
+  "pbsad\t%0, %1, %2"
+  [(set_attr "type" "pbsad")
+   (set_attr "length"   "4")]
+)
+
+(define_insn "unspec_pbsada"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")] UNSPEC_PBSADA))]
+  ""
+  "pbsada\t%0, %2, %3"
+  [(set_attr "type" "pbsada")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "bse"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")]
+  ""
+  {
+    rtx temp0 = gen_reg_rtx (SImode);
+    rtx temp2 = gen_reg_rtx (SImode);
+
+    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
+    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
+    emit_insn (gen_unspec_bse (temp0, operands[1], temp2, temp0, temp2));
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
+    DONE;
+  }
+)
+
+(define_insn "unspec_bse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSE))
+   (set (match_operand:SI 4 "register_operand" "=2")
+	(unspec:SI [(match_dup 1)
+		    (match_dup 2)
+		    (match_dup 0)] UNSPEC_BSE_2))]
+  ""
+  "bse\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "bsp"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")]
+  ""
+  {
+    rtx temp0 = gen_reg_rtx (SImode);
+    rtx temp2 = gen_reg_rtx (SImode);
+
+    emit_move_insn (temp0, gen_rtx_MEM (Pmode, operands[0]));
+    emit_move_insn (temp2, gen_rtx_MEM (Pmode, operands[2]));
+    emit_insn (gen_unspec_bsp (temp0, operands[1], temp2, temp0, temp2));
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[0]), temp0);
+    emit_move_insn (gen_rtx_MEM (Pmode, operands[2]), temp2);
+    DONE;
+  }
+)
+
+(define_insn "unspec_bsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "0")] UNSPEC_BSP))
+   (set (match_operand:SI 4 "register_operand" "=2")
+	(unspec:SI [(match_dup 1)
+		    (match_dup 2)
+		    (match_dup 0)] UNSPEC_BSP_2))]
+  ""
+  "bsp\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; String Extension
+
+(define_insn "unspec_ffb"
+  [(set (match_operand:SI 0 "register_operand" "=r, r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r, r")
+		    (match_operand:SI 2 "nonmemory_operand" "Iu08, r")] UNSPEC_FFB))]
+  ""
+  "@
+  ffbi\t%0, %1, %2
+  ffb\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_ffmism"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FFMISM))]
+  ""
+  "ffmism\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_insn "unspec_flmism"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_FLMISM))]
+  ""
+  "flmism\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+;; SATURATION
+
+(define_insn "unspec_kaddw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDW))]
+  ""
+  "kaddw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBW))]
+  ""
+  "ksubw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))]
+  ""
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))]
+  ""
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))]
+  ""
+  "kdmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))]
+  ""
+  "kdmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))]
+  ""
+  "kdmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))]
+  ""
+  "kdmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))]
+  ""
+  "khmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))]
+  ""
+  "khmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))]
+  ""
+  "khmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))]
+  ""
+  "khmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslraw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))]
+  ""
+  "kslraw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslrawu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))]
+  ""
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_rdov"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_RDOV))]
+  ""
+  "rdov\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_clrov"
+  [(unspec:SI [(const_int 0)] UNSPEC_CLROV)]
+  ""
+  "clrov"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+;; System
+
+(define_insn "unspec_sva"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVA))]
+  ""
+  "sva\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_svs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_SVS))]
+  ""
+  "svs\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_jr_itoff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_ITOFF)]
+  ""
+  "jr.itoff\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_jr_toff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JR_TOFF)]
+  ""
+  "jr.toff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_jral_iton"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_ITON)]
+  ""
+  "jral.iton\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_jral_ton"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_JRAL_TON)]
+  ""
+  "jral.ton\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_ret_itoff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_ITOFF)]
+  ""
+  "ret.itoff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_ret_toff"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_RET_TOFF)]
+  ""
+  "ret.toff\t%0"
+  [(set_attr "type" "branch")]
+)
+
+(define_insn "unspec_standby_no_wake_grant"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_NO_WAKE_GRANT)]
+  ""
+  "standby\tno_wake_grant"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_standby_wake_grant"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_GRANT)]
+  ""
+  "standby\twake_grant"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_standby_wait_done"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_STANDBY_WAKE_DONE)]
+  ""
+  "standby\twait_done"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_teqz"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TEQZ)]
+  ""
+  "teqz\t%0, %1"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_tnez"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+			(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_TNEZ)]
+  ""
+  "tnez\t%0, %1"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_trap"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_TRAP)]
+  ""
+  "trap\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_setend_big"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_BIG)]
+  ""
+  "setend.b"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_setend_little"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETEND_LITTLE)]
+  ""
+  "setend.l"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_break"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_BREAK)]
+  ""
+  "break\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_syscall"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_SYSCALL)]
+  ""
+  "syscall\t%0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_nop"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NOP)]
+  ""
+  "nop"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_get_current_sp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(reg:SI SP_REGNUM)] UNSPEC_VOLATILE_GET_CURRENT_SP))]
+  ""
+  "mov55\t%0, $sp"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_set_current_sp"
+  [(set (reg:SI SP_REGNUM)
+   (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_SET_CURRENT_SP))]
+  ""
+  "mov55\t$sp, %0"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_return_address"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RETURN_ADDRESS))]
+  ""
+  "mov55\t%0, $lp"
+  [(set_attr "type" "misc")]
+)
+
+(define_insn "unspec_signature_begin"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_BEGIN)]
+  ""
+  "isps"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "unspec_signature_end"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SIGNATURE_END)]
+  ""
+  "! -----\;.signature_end\;j8 2\;! -----"
+  [(set_attr "length" "2")]
+)
+
+;; Swap
+
+(define_insn "unspec_wsbh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_WSBH))]
+  ""
+  "wsbh\t%0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+;; TLBOP Intrinsic
+
+(define_insn "unspec_tlbop_trd"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TRD)]
+  ""
+  "tlbop\t%0, TRD"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_twr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_TWR)]
+  ""
+  "tlbop\t%0, TWR"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWR)]
+  ""
+  "tlbop\t%0, RWR"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_rwlk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_RWLK)]
+  ""
+  "tlbop\t%0, RWLK"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_unlk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_UNLK)]
+  ""
+  "tlbop\t%0, UNLK"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_pb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_PB))]
+  ""
+  "tlbop\t%0, %1, PB"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_inv"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_TLBOP_INV)]
+  ""
+  "tlbop\t%0, INV"
+  [(set_attr "type" "mmu")]
+)
+
+(define_insn "unspec_tlbop_flua"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_TLBOP_FLUA)]
+  ""
+  "tlbop\tFLUA"
+  [(set_attr "type" "mmu")]
+)
+
+;;Unaligned Load/Store
+
+(define_expand "unaligned_load_hw"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI [(mem:HI (match_operand:SI 1 "register_operand" ""))] UNSPEC_UALOAD_HW))]
+  ""
+{
+  operands[0] = simplify_gen_subreg (SImode, operands[0],
+				     GET_MODE (operands[0]), 0);
+  if (TARGET_ISA_V3M)
+    {
+      nds32_expand_unaligned_load (operands, HImode);
+    }
+  else
+    {
+      emit_insn (gen_unaligned_load_w (operands[0],
+				       gen_rtx_MEM (SImode, operands[1])));
+
+      if (WORDS_BIG_ENDIAN)
+	emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT(16)));
+      else
+	emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0xffff)));
+    }
+
+  DONE;
+})
+
+(define_expand "unaligned_loadsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_load (operands, SImode);
+  else
+    emit_insn (gen_unaligned_load_w (operands[0],
+				     gen_rtx_MEM (SImode, (operands[1]))));
+  DONE;
+})
+
+(define_insn "unaligned_load_w"
+  [(set (match_operand:SI 0 "register_operand"                       "=  r")
+	(unspec:SI [(match_operand:SI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  ""
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_loaddi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    {
+      nds32_expand_unaligned_load (operands, DImode);
+    }
+  else
+    emit_insn (gen_unaligned_load_dw (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_load_dw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_DW))]
+  ""
+{
+  rtx otherops[3];
+  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  otherops[2] = operands[1];
+
+  output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops);
+  return "";
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store_hw"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" ""))
+	(unspec:HI [(match_operand:HI 1 "register_operand" "")] UNSPEC_UASTORE_HW))]
+  ""
+{
+  operands[1] = simplify_gen_subreg (SImode, operands[1],
+				     GET_MODE (operands[1]), 0);
+  nds32_expand_unaligned_store (operands, HImode);
+  DONE;
+})
+
+(define_expand "unaligned_storesi"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, SImode);
+  else
+    emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[0]),
+				      operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_w"
+  [(set (match_operand:SI 0 "nds32_lmw_smw_base_operand"   "=Umw")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  ""
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_expand "unaligned_storedi"
+  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
+  ""
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, DImode);
+  else
+    emit_insn (gen_unaligned_store_dw (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_dw"
+  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
+  ""
+{
+  rtx otherops[3];
+  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
+  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  otherops[2] = operands[0];
+
+  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
+  return "";
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_expand "unspec_unaligned_feature"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE))]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_move_insn (temp_reg, operands[0]);
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_iorsi3 (operands[0], operands[0], temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (operands[0], system_reg));
+  emit_insn (gen_unspec_dsb ());
+
+  emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+
+  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (8)));
+  emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
+  DONE;
+})
+
+(define_expand "unspec_enable_unaligned"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_iorsi3 (temp_reg, temp_reg, temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+(define_expand "unspec_disable_unaligned"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_UNALIGNED_FEATURE)]
+  ""
+{
+  /* Get $MMU_CTL system register form nds32_intrinsic_register_names[]  */
+  rtx system_reg =  GEN_INT (__NDS32_REG_MMU_CTL__);
+  rtx temp_reg = gen_reg_rtx (SImode);
+  rtx temp2_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_unspec_volatile_mfsr (temp_reg, system_reg));
+  emit_move_insn (temp2_reg, GEN_INT (0x800 << 12));
+  emit_insn (gen_one_cmplsi2 (temp2_reg, temp2_reg));
+  emit_insn (gen_andsi3 (temp_reg, temp_reg, temp2_reg));
+  emit_insn (gen_unspec_volatile_mtsr (temp_reg, system_reg));
+  emit_insn (gen_unspec_dsb ());
+  DONE;
+})
+
+;; abs alias kabs
+
+(define_insn "unspec_kabs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))]
+  ""
+  "kabs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "no_hwloop"
+  [(const_int 0)]
+  ""
+{
+  if (NDS32_HW_LOOP_P ())
+    emit_insn (gen_unspec_no_hwloop ());
+  else
+    emit_insn (gen_nop ());
+
+  DONE;
+})
+
+(define_insn "unspec_no_hwloop"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_NO_HWLOOP)]
+  ""
+  ""
+  [(set_attr "type" "misc")]
+)
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-isr.c gcc-4.9.4/gcc/config/nds32/nds32-isr.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-isr.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-isr.c	2016-08-08 20:37:45.502269936 +0200
@@ -0,0 +1,972 @@
+/* Subroutines used for ISR of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Refer to nds32.h, there are maximum 73 isr vectors in nds32 architecture.
+   0 for reset handler with __attribute__((reset())),
+   1-8 for exception handler with __attribute__((exception(1,...,8))),
+   and 9-72 for interrupt handler with __attribute__((interrupt(0,...,63))).
+   We use an array to record essential information for each vector.  */
+static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
+
+/* ------------------------------------------------------------- */
+/* FIXME:
+   FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+       __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+       __attribute__((exception("XXX;YYY;id=ZZZ")))
+       __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+   We provide several functions to parse the strings.  */
+
+static void
+nds32_interrupt_attribute_parse_string (const char *original_str,
+					const char *func_name)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For interrupt(0..63), the actual vector number is (9..72).  */
+      i = i + 9;
+      if (i < 9 || i > 72)
+	error ("invalid id value for interrupt attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_exception_attribute_parse_string (const char *original_str,
+					const char *func_name)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For exception(1..8), the actual vector number is (1..8).  */
+      if (i < 1 || i > 8)
+	error ("invalid id value for exception attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_reset_attribute_parse_string (const char *original_str,
+				    const char *func_name)
+{
+  char target_str[100];
+  char *vectors_str, *nmi_str, *warm_str, *value_str;
+
+  /* Deal with reset attribute.  Its vector number is always 0.  */
+  nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+
+
+  /* 1. Parse 'vectors=XXXX'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  vectors_str = strstr (target_str, "vectors=");
+  /* The total vectors = interrupt + exception numbers + reset.
+     There are 8 exception and 1 reset in nds32 architecture.
+     If user forgets to assign 'vectors', user default 16 interrupts.  */
+  if (vectors_str != NULL)
+    {
+      /* Extract the value_str.  */
+      vectors_str = strtok (vectors_str, "=");
+      value_str  = strtok (NULL, ";");
+      nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1;
+    }
+  else
+    nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1;
+  strcpy (nds32_isr_vectors[0].func_name, func_name);
+
+
+  /* 2. Parse 'nmi_func=YYYY'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  nmi_str = strstr (target_str, "nmi_func=");
+  if (nmi_str != NULL)
+    {
+      /* Extract the value_str.  */
+      nmi_str = strtok (nmi_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].nmi_name, value_str);
+    }
+
+  /* 3. Parse 'warm_func=ZZZZ'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  warm_str = strstr (target_str, "warm_func=");
+  if (warm_str != NULL)
+    {
+      /* Extract the value_str.  */
+      warm_str = strtok (warm_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].warm_name, value_str);
+    }
+
+  return;
+}
+/* ------------------------------------------------------------- */
+
+/* A helper function to emit section head template.  */
+static void
+nds32_emit_section_head_template (char section_name[],
+				  char symbol_name[],
+				  int align_value,
+				  bool object_p)
+{
+  const char *flags_str;
+  const char *type_str;
+
+  flags_str = (object_p) ? "\"a\"" : "\"ax\"";
+  type_str = (object_p) ? "@object" : "@function";
+
+  fprintf (asm_out_file, "\t.section\t%s, %s\n", section_name, flags_str);
+  fprintf (asm_out_file, "\t.align\t%d\n", align_value);
+  fprintf (asm_out_file, "\t.global\t%s\n", symbol_name);
+  fprintf (asm_out_file, "\t.type\t%s, %s\n", symbol_name, type_str);
+  fprintf (asm_out_file, "%s:\n", symbol_name);
+}
+
+/* A helper function to emit section tail template.  */
+static void
+nds32_emit_section_tail_template (char symbol_name[])
+{
+  fprintf (asm_out_file, "\t.size\t%s, .-%s\n", symbol_name, symbol_name);
+}
+
+/* Function to emit isr jump table section.  */
+static void
+nds32_emit_isr_jmptbl_section (int vector_id)
+{
+  char section_name[100];
+  char symbol_name[100];
+
+  /* A critical isr does not need jump table section because
+     its behavior is not performed by two-level handler.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n",
+			     vector_id);
+      return;
+    }
+
+  /* Prepare jmptbl section and symbol name.  */
+  snprintf (section_name, sizeof (section_name),
+	    ".nds32_jmptbl.%02d", vector_id);
+  snprintf (symbol_name, sizeof (symbol_name),
+	    "_nds32_jmptbl_%02d", vector_id);
+
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 nds32_isr_vectors[vector_id].func_name);
+  nds32_emit_section_tail_template (symbol_name);
+}
+
+/* Function to emit isr vector section.  */
+static void
+nds32_emit_isr_vector_section (int vector_id)
+{
+  unsigned int vector_number_offset = 0;
+  const char *c_str = "CATEGORY";
+  const char *sr_str = "SR";
+  const char *nt_str = "NT";
+  char first_level_handler_name[100];
+  char section_name[100];
+  char symbol_name[100];
+
+  /* Set the vector number offset so that we can calculate
+     the value that user specifies in the attribute.
+     We also prepare the category string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].category)
+    {
+    case NDS32_ISR_INTERRUPT:
+      vector_number_offset = 9;
+      c_str = "i";
+      break;
+    case NDS32_ISR_EXCEPTION:
+      vector_number_offset = 0;
+      c_str = "e";
+      break;
+    case NDS32_ISR_NONE:
+    case NDS32_ISR_RESET:
+      /* Normally it should not be here.  */
+      gcc_unreachable ();
+      break;
+    }
+
+  /* Prepare save reg string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].save_reg)
+    {
+    case NDS32_SAVE_ALL:
+      sr_str = "sa";
+      break;
+    case NDS32_PARTIAL_SAVE:
+      sr_str = "ps";
+      break;
+    }
+
+  /* Prepare nested type string for first level handler name.  */
+  switch (nds32_isr_vectors[vector_id].nested_type)
+    {
+    case NDS32_NESTED:
+      nt_str = "ns";
+      break;
+    case NDS32_NOT_NESTED:
+      nt_str = "nn";
+      break;
+    case NDS32_NESTED_READY:
+      nt_str = "nr";
+      break;
+    case NDS32_CRITICAL:
+      /* The critical isr is not performed by two-level handler.  */
+      nt_str = "";
+      break;
+    }
+
+  /* Now we can create first level handler name.  */
+  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+	    "_nds32_%s_%s_%s", c_str, sr_str, nt_str);
+
+  /* Prepare vector section and symbol name.  */
+  snprintf (section_name, sizeof (section_name),
+	    ".nds32_vector.%02d", vector_id);
+  snprintf (symbol_name, sizeof (symbol_name),
+	    "_nds32_vector_%02d", vector_id);
+
+
+  /* Everything is ready.  We can start emit vector section content.  */
+  nds32_emit_section_head_template (section_name, symbol_name,
+				    floor_log2 (nds32_isr_vector_size), false);
+
+  /* First we check if it is a critical isr.
+     If so, jump to user handler directly; otherwise, the instructions
+     in the vector section may be different according to the vector size.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      /* This block is for critical isr.  Jump to user handler directly.  */
+      fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n",
+			     nds32_isr_vectors[vector_id].func_name);
+    }
+  else if (nds32_isr_vector_size == 4)
+    {
+      /* This block is for 4-byte vector size.
+	 Hardware $VID support is necessary and only one instruction
+	 is needed in vector section.  */
+      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
+			     first_level_handler_name);
+    }
+  else
+    {
+      /* This block is for 16-byte vector size.
+	 There is NO hardware $VID so that we need several instructions
+	 such as pushing GPRs and preparing software vid at vector section.
+	 For pushing GPRs, there are four variations for
+	 16-byte vector content and we have to handle each combination.
+	 For preparing software vid, note that the vid need to
+	 be substracted vector_number_offset.  */
+      if (TARGET_REDUCED_REGS)
+	{
+	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
+	    {
+	      /* Case of reduced set registers and save_all attribute.  */
+	      fprintf (asm_out_file, "\t! reduced set regs + save_all\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0xf\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r10, 0x0\n");
+
+	    }
+	  else
+	    {
+	      /* Case of reduced set registers and partial_save attribute.  */
+	      fprintf (asm_out_file, "\t! reduced set regs + partial_save\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0x2\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
+	    }
+	}
+      else
+	{
+	  if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL)
+	    {
+	      /* Case of full set registers and save_all attribute.  */
+	      fprintf (asm_out_file, "\t! full set regs + save_all\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r27, 0xf\n");
+	    }
+	  else
+	    {
+	      /* Case of full set registers and partial_save attribute.  */
+	      fprintf (asm_out_file, "\t! full set regs + partial_save\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r27, 0x2\n");
+	      fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n");
+	    }
+	}
+
+      fprintf (asm_out_file, "\tmovi\t$r0, %d ! preparing software vid\n",
+			     vector_id - vector_number_offset);
+      fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n",
+			     first_level_handler_name);
+    }
+
+  nds32_emit_section_tail_template (symbol_name);
+}
+
+/* Function to emit isr reset handler content.
+   Including all jmptbl/vector references, jmptbl section,
+   vector section, nmi handler section, and warm handler section.  */
+static void
+nds32_emit_isr_reset_content (void)
+{
+  unsigned int i;
+  unsigned int total_n_vectors;
+  char reset_handler_name[100];
+  char section_name[100];
+  char symbol_name[100];
+
+  total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
+
+  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
+
+  /* Create references in .rodata according to total number of vectors.  */
+  fprintf (asm_out_file, "\t.section\t.rodata\n");
+  fprintf (asm_out_file, "\t.align\t2\n");
+
+  /* Emit jmptbl references.  */
+  fprintf (asm_out_file, "\t ! references to jmptbl section entries\n");
+  for (i = 0; i < total_n_vectors; i++)
+    fprintf (asm_out_file, "\t.word\t_nds32_jmptbl_%02d\n", i);
+
+  /* Emit vector references.  */
+  fprintf (asm_out_file, "\t ! references to vector section entries\n");
+  for (i = 0; i < total_n_vectors; i++)
+    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i);
+
+  /* Emit jmptbl_00 section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_jmptbl_00");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 nds32_isr_vectors[0].func_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit vector_00 section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00");
+  snprintf (reset_handler_name, sizeof (reset_handler_name),
+	    "_nds32_reset");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name,
+				    floor_log2 (nds32_isr_vector_size), false);
+  fprintf (asm_out_file, "\tj\t%s ! jump to reset handler\n",
+			 reset_handler_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit nmi handler section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_nmih");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_nmih");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 (strlen (nds32_isr_vectors[0].nmi_name) == 0)
+			 ? "0"
+			 : nds32_isr_vectors[0].nmi_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  /* Emit warm handler section.  */
+  snprintf (section_name, sizeof (section_name), ".nds32_wrh");
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_wrh");
+
+  fprintf (asm_out_file, "\t! ....................................\n");
+  nds32_emit_section_head_template (section_name, symbol_name, 2, true);
+  fprintf (asm_out_file, "\t.word\t%s\n",
+			 (strlen (nds32_isr_vectors[0].warm_name) == 0)
+			 ? "0"
+			 : nds32_isr_vectors[0].warm_name);
+  nds32_emit_section_tail_template (symbol_name);
+
+  fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - END !\n");
+}
+
+/* Function for nds32_merge_decl_attributes() and nds32_insert_attributes()
+   to check if there are any conflict isr-specific attributes being set.
+   We need to check:
+     1. Only 'save_all' or 'partial_save' in the attributes.
+     2. Only 'nested', 'not_nested', or 'nested_ready' in the attributes.
+     3. Only 'interrupt', 'exception', or 'reset' in the attributes.  */
+void
+nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
+{
+  int save_all_p, partial_save_p;
+  int nested_p, not_nested_p, nested_ready_p, critical_p;
+  int intr_p, excp_p, reset_p;
+
+  /* Initialize variables.  */
+  save_all_p = partial_save_p = 0;
+  nested_p = not_nested_p = nested_ready_p = critical_p = 0;
+  intr_p = excp_p = reset_p = 0;
+
+  /* We must check at MOST one attribute to set save-reg.  */
+  if (lookup_attribute ("save_all", func_attrs))
+    save_all_p = 1;
+  if (lookup_attribute ("partial_save", func_attrs))
+    partial_save_p = 1;
+
+  if ((save_all_p + partial_save_p) > 1)
+    error ("multiple save reg attributes to function %qD", func_decl);
+
+  /* We must check at MOST one attribute to set nested-type.  */
+  if (lookup_attribute ("nested", func_attrs))
+    nested_p = 1;
+  if (lookup_attribute ("not_nested", func_attrs))
+    not_nested_p = 1;
+  if (lookup_attribute ("nested_ready", func_attrs))
+    nested_ready_p = 1;
+  if (lookup_attribute ("critical", func_attrs))
+    critical_p = 1;
+
+  if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1)
+    error ("multiple nested types attributes to function %qD", func_decl);
+
+  /* We must check at MOST one attribute to
+     set interrupt/exception/reset.  */
+  if (lookup_attribute ("interrupt", func_attrs))
+    intr_p = 1;
+  if (lookup_attribute ("exception", func_attrs))
+    excp_p = 1;
+  if (lookup_attribute ("reset", func_attrs))
+    reset_p = 1;
+
+  if ((intr_p + excp_p + reset_p) > 1)
+    error ("multiple interrupt attributes to function %qD", func_decl);
+
+  /* Do not allow isr attributes under linux toolchain.  */
+  if (TARGET_LINUX_ABI && intr_p)
+      error ("cannot use interrupt attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && excp_p)
+      error ("cannot use exception attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && reset_p)
+      error ("cannot use reset attributes to function %qD "
+	     "under linux toolchain", func_decl);
+}
+
+/* Function to construct isr vectors information array.
+   We DO NOT HAVE TO check if the attributes are valid
+   because those works are supposed to be done on
+   nds32_merge_decl_attributes() and nds32_insert_attributes().  */
+void
+nds32_construct_isr_vectors_information (tree func_attrs,
+					 const char *func_name)
+{
+  tree save_all, partial_save;
+  tree nested, not_nested, nested_ready, critical;
+  tree intr, excp, reset;
+
+  save_all     = lookup_attribute ("save_all", func_attrs);
+  partial_save = lookup_attribute ("partial_save", func_attrs);
+
+  nested       = lookup_attribute ("nested", func_attrs);
+  not_nested   = lookup_attribute ("not_nested", func_attrs);
+  nested_ready = lookup_attribute ("nested_ready", func_attrs);
+  critical     = lookup_attribute ("critical", func_attrs);
+
+  intr  = lookup_attribute ("interrupt", func_attrs);
+  excp  = lookup_attribute ("exception", func_attrs);
+  reset = lookup_attribute ("reset", func_attrs);
+
+  /* If there is no interrupt/exception/reset, we can return immediately.  */
+  if (!intr && !excp && !reset)
+    return;
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+	 __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+	 __attribute__((exception("XXX;YYY;id=ZZZ")))
+	 __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+     If interrupt/exception/reset appears and its argument is a
+     STRING_CST, we will parse string with some auxiliary functions
+     which set necessary isr information in the nds32_isr_vectors[] array.
+     After that, we can return immediately to avoid new-syntax isr
+     information construction.  */
+  if (intr != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (intr));
+      nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name);
+      return;
+    }
+  if (excp != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (excp));
+      nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name);
+      return;
+    }
+  if (reset != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (reset));
+      nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					  func_name);
+      return;
+    }
+  /* ------------------------------------------------------------- */
+
+  /* If we are here, either we have interrupt/exception,
+     or reset attribute.  */
+  if (intr || excp)
+    {
+      tree id_list;
+
+      /* Prepare id list so that we can traverse and set vector id.  */
+      id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp));
+
+      while (id_list)
+	{
+	  tree id;
+	  int vector_id;
+	  unsigned int vector_number_offset;
+
+	  /* The way to handle interrupt or exception is the same,
+	     we just need to take care of actual vector number.
+	     For interrupt(0..63), the actual vector number is (9..72).
+	     For exception(1..8), the actual vector number is (1..8).  */
+	  vector_number_offset = (intr) ? (9) : (0);
+
+	  /* Pick up each vector id value.  */
+	  id = TREE_VALUE (id_list);
+	  /* Add vector_number_offset to get actual vector number.  */
+	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
+
+	  /* Enable corresponding vector and set function name.  */
+	  nds32_isr_vectors[vector_id].category = (intr)
+						  ? (NDS32_ISR_INTERRUPT)
+						  : (NDS32_ISR_EXCEPTION);
+	  strcpy (nds32_isr_vectors[vector_id].func_name, func_name);
+
+	  /* Set register saving scheme.  */
+	  if (save_all)
+	    nds32_isr_vectors[vector_id].save_reg = NDS32_SAVE_ALL;
+	  else if (partial_save)
+	    nds32_isr_vectors[vector_id].save_reg = NDS32_PARTIAL_SAVE;
+
+	  /* Set nested type.  */
+	  if (nested)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED;
+	  else if (not_nested)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
+	  else if (nested_ready)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
+	  else if (critical)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL;
+
+	  /* Advance to next id.  */
+	  id_list = TREE_CHAIN (id_list);
+	}
+    }
+  else
+    {
+      tree id_list;
+      tree id;
+      tree nmi, warm;
+
+      /* Deal with reset attribute.  Its vector number is always 0.  */
+      nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+
+      /* Prepare id_list and identify id value so that
+	 we can set total number of vectors.  */
+      id_list = TREE_VALUE (reset);
+      id = TREE_VALUE (id_list);
+
+      /* The total vectors = interrupt + exception numbers + reset.
+	 There are 8 exception and 1 reset in nds32 architecture.  */
+      nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1;
+      strcpy (nds32_isr_vectors[0].func_name, func_name);
+
+      /* Retrieve nmi and warm function.  */
+      nmi  = lookup_attribute ("nmi", func_attrs);
+      warm = lookup_attribute ("warm", func_attrs);
+
+      if (nmi != NULL_TREE)
+	{
+	  tree nmi_func_list;
+	  tree nmi_func;
+
+	  nmi_func_list = TREE_VALUE (nmi);
+	  nmi_func = TREE_VALUE (nmi_func_list);
+
+	  /* Record nmi function name.  */
+	  strcpy (nds32_isr_vectors[0].nmi_name,
+		  IDENTIFIER_POINTER (nmi_func));
+	}
+
+      if (warm != NULL_TREE)
+	{
+	  tree warm_func_list;
+	  tree warm_func;
+
+	  warm_func_list = TREE_VALUE (warm);
+	  warm_func = TREE_VALUE (warm_func_list);
+
+	  /* Record warm function name.  */
+	  strcpy (nds32_isr_vectors[0].warm_name,
+		  IDENTIFIER_POINTER (warm_func));
+	}
+    }
+}
+
+void
+nds32_asm_file_start_for_isr (void)
+{
+  int i;
+
+  /* Initialize isr vector information array before compiling functions.  */
+  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
+    {
+      nds32_isr_vectors[i].category = NDS32_ISR_NONE;
+      strcpy (nds32_isr_vectors[i].func_name, "");
+      nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
+      nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
+      nds32_isr_vectors[i].total_n_vectors = 0;
+      strcpy (nds32_isr_vectors[i].nmi_name, "");
+      strcpy (nds32_isr_vectors[i].warm_name, "");
+    }
+}
+
+void nds32_asm_file_end_for_isr (void)
+{
+  int i;
+
+  /* If all the vectors are NDS32_ISR_NONE, we can return immediately.  */
+  for (i = 0; i < NDS32_N_ISR_VECTORS; i++)
+    if (nds32_isr_vectors[i].category != NDS32_ISR_NONE)
+      break;
+
+  if (i == NDS32_N_ISR_VECTORS)
+    return;
+
+  /* At least one vector is NOT NDS32_ISR_NONE,
+     we should output isr vector information.  */
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+  fprintf (asm_out_file, "\t! The isr vector information:\n");
+  fprintf (asm_out_file, "\t! ------------------------------------\n");
+
+  /* Check reset handler first.  Its vector number is always 0.  */
+  if (nds32_isr_vectors[0].category == NDS32_ISR_RESET)
+    {
+      nds32_emit_isr_reset_content ();
+      fprintf (asm_out_file, "\t! ------------------------------------\n");
+    }
+
+  /* Check other vectors, starting from vector number 1.  */
+  for (i = 1; i < NDS32_N_ISR_VECTORS; i++)
+    {
+      if (nds32_isr_vectors[i].category == NDS32_ISR_INTERRUPT
+	  || nds32_isr_vectors[i].category == NDS32_ISR_EXCEPTION)
+	{
+	  /* Found one vector which is interupt or exception.
+	     Output its jmptbl and vector section content.  */
+	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
+	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+	  nds32_emit_isr_jmptbl_section (i);
+	  fprintf (asm_out_file, "\t! ....................................\n");
+	  nds32_emit_isr_vector_section (i);
+	  fprintf (asm_out_file, "\t! ------------------------------------\n");
+	}
+    }
+}
+
+/* Return true if FUNC is a isr function.  */
+bool
+nds32_isr_function_p (tree func)
+{
+  tree t_intr;
+  tree t_excp;
+  tree t_reset;
+
+  tree attrs;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  attrs = DECL_ATTRIBUTES (func);
+
+  t_intr  = lookup_attribute ("interrupt", attrs);
+  t_excp  = lookup_attribute ("exception", attrs);
+  t_reset = lookup_attribute ("reset", attrs);
+
+  return ((t_intr != NULL_TREE)
+	  || (t_excp != NULL_TREE)
+	  || (t_reset != NULL_TREE));
+}
+
+/* Return true if FUNC is a isr function with critical attribute.  */
+bool
+nds32_isr_function_critical_p (tree func)
+{
+  tree t_intr;
+  tree t_excp;
+  tree t_critical;
+
+  tree attrs;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  attrs = DECL_ATTRIBUTES (func);
+
+  t_intr  = lookup_attribute ("interrupt", attrs);
+  t_excp  = lookup_attribute ("exception", attrs);
+
+  t_critical = lookup_attribute ("critical", attrs);
+
+  /* If both interrupt and exception attribute does not appear,
+     we can return false immediately.  */
+  if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE))
+    return false;
+
+  /* Here we can guarantee either interrupt or ecxception attribute
+     does exist, so further check critical attribute.
+     If it also appears, we can return true.  */
+  if (t_critical != NULL_TREE)
+    return true;
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to handle string type.
+     If the string 'critical' appears in the interrupt/exception
+     string argument, we can return true.  */
+  if (t_intr != NULL_TREE || t_excp != NULL_TREE)
+    {
+      char target_str[100];
+      char *critical_str;
+      tree t_check;
+      tree string_arg;
+
+      t_check = t_intr ? t_intr : t_excp;
+      if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST)
+	{
+	  string_arg = TREE_VALUE (TREE_VALUE (t_check));
+	  strcpy (target_str, TREE_STRING_POINTER (string_arg));
+	  critical_str = strstr (target_str, "critical");
+
+	  /* Found 'critical' string, so return true.  */
+	  if (critical_str)
+	    return true;
+	}
+    }
+  /* ------------------------------------------------------------- */
+
+  /* Other cases, this isr function is not critical type.  */
+  return false;
+}
+
+/* ------------------------------------------------------------- */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32_isr.h gcc-4.9.4/gcc/config/nds32/nds32_isr.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32_isr.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32_isr.h	2016-08-08 20:37:45.594273497 +0200
@@ -0,0 +1,526 @@
+/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_ISR_H
+#define _NDS32_ISR_H
+
+/* Attribute of a interrupt or exception handler:
+
+   NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit.
+   NDS32_NESTED      : This handler is interruptible.  This is not suitable
+                       exception handler.
+   NDS32_NOT_NESTED  : This handler is NOT interruptible.  Users have to do
+                       some work if nested is wanted
+   NDS32_CRITICAL    : This handler is critical ISR, which means it is small
+                       and efficient.  */
+#define NDS32_READY_NESTED   0
+#define NDS32_NESTED         1
+#define NDS32_NOT_NESTED     2
+#define NDS32_CRITICAL       3
+
+/* Attribute of a interrupt or exception handler:
+
+   NDS32_SAVE_ALL_REGS    : Save all registers in a table.
+   NDS32_SAVE_PARTIAL_REGS: Save partial registers.  */
+#define NDS32_SAVE_CALLER_REGS   0
+#define NDS32_SAVE_ALL_REGS      1
+
+/* There are two version of Register table for interrupt and exception handler,
+   one for 16-register CPU the other for 32-register CPU.  These structures are
+   used for context switching or system call handling.  The address of this
+   data can be get from the input argument of the handler functions.
+
+   For system call handling, r0 to r5 are used to pass arguments.  If more
+   arguments are used they are put into the stack and its starting address is
+   in sp.  Return value of system call can be put into r0 and r1 upon exit from
+   system call handler.  System call ID is in a system register and it can be
+   fetched via intrinsic function.  For more information please read ABI and
+   other related documents.
+
+   For context switching, at least 2 values need to saved in kernel.  One is
+   IPC and the other is the stack address of current task.  Use intrinsic
+   function to get IPC and  the input argument of the handler functions + 8 to
+   get stack address of current task.  To do context switching, you replace
+   new_sp with the stack address of new task and replace IPC system register
+   with IPC of new task, then, just return from handler.  The context switching
+   will happen.  */
+
+/* Register table for exception handler; 32-register version.  */
+typedef struct
+{
+  int r0;
+  int r1;
+  int r2;
+  int r3;
+  int r4;
+  int r5;
+  int r6;
+  int r7;
+  int r8;
+  int r9;
+  int r10;
+  int r11;
+  int r12;
+  int r13;
+  int r14;
+  int r15;
+  int r16;
+  int r17;
+  int r18;
+  int r19;
+  int r20;
+  int r21;
+  int r22;
+  int r23;
+  int r24;
+  int r25;
+  int r26;
+  int r27;
+  int fp;
+  int gp;
+  int lp;
+  int sp;
+} NDS32_GPR32;
+
+/* Register table for exception handler; 16-register version.  */
+typedef struct
+{
+  int r0;
+  int r1;
+  int r2;
+  int r3;
+  int r4;
+  int r5;
+  int r6;
+  int r7;
+  int r8;
+  int r9;
+  int r10;
+  int r15;
+  int fp;
+  int gp;
+  int lp;
+  int sp;
+} NDS32_GPR16;
+
+
+/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to
+   access register table.  */
+typedef struct
+{
+  union
+    {
+      int          reg_a[32] ;
+      NDS32_GPR32  reg_s ;
+    } u ;
+} NDS32_REG32_TAB;
+
+typedef struct
+{
+  union
+    {
+      int          reg_a[16] ;
+      NDS32_GPR16  reg_s ;
+    } u ;
+} NDS32_REG16_TAB;
+
+typedef struct
+{
+  int    d0lo;
+  int    d0hi;
+  int    d1lo;
+  int    d1hi;
+} NDS32_DX_TAB;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+#endif
+} NDS32_FSR8;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+} NDS32_DSR4;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+  float    fsr8;
+  float    fsr9;
+  float    fsr10;
+  float    fsr11;
+  float    fsr12;
+  float    fsr13;
+  float    fsr14;
+  float    fsr15;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+  float    fsr9;
+  float    fsr8;
+  float    fsr11;
+  float    fsr10;
+  float    fsr13;
+  float    fsr12;
+  float    fsr15;
+  float    fsr14;
+#endif
+} NDS32_FSR16;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+} NDS32_DSR8;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+  float    fsr8;
+  float    fsr9;
+  float    fsr10;
+  float    fsr11;
+  float    fsr12;
+  float    fsr13;
+  float    fsr14;
+  float    fsr15;
+  float    fsr16;
+  float    fsr17;
+  float    fsr18;
+  float    fsr19;
+  float    fsr20;
+  float    fsr21;
+  float    fsr22;
+  float    fsr23;
+  float    fsr24;
+  float    fsr25;
+  float    fsr26;
+  float    fsr27;
+  float    fsr28;
+  float    fsr29;
+  float    fsr30;
+  float    fsr31;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+  float    fsr9;
+  float    fsr8;
+  float    fsr11;
+  float    fsr10;
+  float    fsr13;
+  float    fsr12;
+  float    fsr15;
+  float    fsr14;
+  float    fsr17;
+  float    fsr16;
+  float    fsr19;
+  float    fsr18;
+  float    fsr21;
+  float    fsr20;
+  float    fsr23;
+  float    fsr22;
+  float    fsr25;
+  float    fsr24;
+  float    fsr27;
+  float    fsr26;
+  float    fsr29;
+  float    fsr28;
+  float    fsr31;
+  float    fsr30;
+#endif
+} NDS32_FSR32;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+  double   dsr8;
+  double   dsr9;
+  double   dsr10;
+  double   dsr11;
+  double   dsr12;
+  double   dsr13;
+  double   dsr14;
+  double   dsr15;
+} NDS32_DSR16;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+  double   dsr8;
+  double   dsr9;
+  double   dsr10;
+  double   dsr11;
+  double   dsr12;
+  double   dsr13;
+  double   dsr14;
+  double   dsr15;
+  double   dsr16;
+  double   dsr17;
+  double   dsr18;
+  double   dsr19;
+  double   dsr20;
+  double   dsr21;
+  double   dsr22;
+  double   dsr23;
+  double   dsr24;
+  double   dsr25;
+  double   dsr26;
+  double   dsr27;
+  double   dsr28;
+  double   dsr29;
+  double   dsr30;
+  double   dsr31;
+} NDS32_DSR32;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR8   fsr_s ;
+      NDS32_DSR4   dsr_s ;
+    } u ;
+} NDS32_FPU8_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR16  fsr_s ;
+      NDS32_DSR8   dsr_s ;
+    } u ;
+} NDS32_FPU16_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR32  fsr_s ;
+      NDS32_DSR16  dsr_s ;
+    } u ;
+} NDS32_FPU32_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR32  fsr_s ;
+      NDS32_DSR32  dsr_s ;
+    } u ;
+} NDS32_FPU64_TAB;
+
+typedef struct
+{
+  int    ipc;
+  int    ipsw;
+#if defined(NDS32_EXT_FPU_CONFIG_0)
+  NDS32_FPU8_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_1)
+  NDS32_FPU16_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_2)
+  NDS32_FPU32_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_3)
+  NDS32_FPU64_TAB fpr;
+#endif
+#if __NDS32_DX_REGS__
+  NDS32_DX_TAB dxr;
+#endif
+#if __NDS32_EXT_IFC__
+  int    ifc_lp;
+  int    filler;
+#endif
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+  NDS32_REG16_TAB gpr;
+#else
+  NDS32_REG32_TAB gpr;
+#endif
+} NDS32_CONTEXT;
+
+/* Predefined Vector Definition.
+
+   For IVIC Mode: 9 to 14 are for hardware interrupt
+                  and 15 is for software interrupt.
+   For EVIC Mode: 9 to 72 are for hardware interrupt
+                  and software interrupt can be routed to any one of them.
+
+   You may want to define your hardware interrupts in the following way
+   for easy maintainance.
+
+     IVIC mode:
+       #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1
+       #define MY_HW_IVIC_USB   NDS32_VECTOR_INTERRUPT_HW0 + 3
+     EVIC mode:
+     #define MY_HW_EVIC_DMA   NDS32_VECTOR_INTERRUPT_HW0 + 2
+     #define MY_HW_EVIC_SWI   NDS32_VECTOR_INTERRUPT_HW0 + 10 */
+#define NDS32_VECTOR_RESET               0
+#define NDS32_VECTOR_TLB_FILL            1
+#define NDS32_VECTOR_PTE_NOT_PRESENT     2
+#define NDS32_VECTOR_TLB_MISC            3
+#define NDS32_VECTOR_TLB_VLPT_MISS       4
+#define NDS32_VECTOR_MACHINE_ERROR       5
+#define NDS32_VECTOR_DEBUG_RELATED       6
+#define NDS32_VECTOR_GENERAL_EXCEPTION   7
+#define NDS32_VECTOR_SYSCALL             8
+#define NDS32_VECTOR_INTERRUPT_HW0       9
+#define NDS32_VECTOR_INTERRUPT_HW1       10
+#define NDS32_VECTOR_INTERRUPT_HW2       11
+#define NDS32_VECTOR_INTERRUPT_HW3       12
+#define NDS32_VECTOR_INTERRUPT_HW4       13
+#define NDS32_VECTOR_INTERRUPT_HW5       14
+#define NDS32_VECTOR_INTERRUPT_HW6       15
+#define NDS32_VECTOR_SWI                 15  /* THIS IS FOR IVIC MODE ONLY */
+#define NDS32_VECTOR_INTERRUPT_HW7       16
+#define NDS32_VECTOR_INTERRUPT_HW8       17
+#define NDS32_VECTOR_INTERRUPT_HW9       18
+#define NDS32_VECTOR_INTERRUPT_HW10      19
+#define NDS32_VECTOR_INTERRUPT_HW11      20
+#define NDS32_VECTOR_INTERRUPT_HW12      21
+#define NDS32_VECTOR_INTERRUPT_HW13      22
+#define NDS32_VECTOR_INTERRUPT_HW14      23
+#define NDS32_VECTOR_INTERRUPT_HW15      24
+#define NDS32_VECTOR_INTERRUPT_HW16      25
+#define NDS32_VECTOR_INTERRUPT_HW17      26
+#define NDS32_VECTOR_INTERRUPT_HW18      27
+#define NDS32_VECTOR_INTERRUPT_HW19      28
+#define NDS32_VECTOR_INTERRUPT_HW20      29
+#define NDS32_VECTOR_INTERRUPT_HW21      30
+#define NDS32_VECTOR_INTERRUPT_HW22      31
+#define NDS32_VECTOR_INTERRUPT_HW23      32
+#define NDS32_VECTOR_INTERRUPT_HW24      33
+#define NDS32_VECTOR_INTERRUPT_HW25      34
+#define NDS32_VECTOR_INTERRUPT_HW26      35
+#define NDS32_VECTOR_INTERRUPT_HW27      36
+#define NDS32_VECTOR_INTERRUPT_HW28      37
+#define NDS32_VECTOR_INTERRUPT_HW29      38
+#define NDS32_VECTOR_INTERRUPT_HW30      39
+#define NDS32_VECTOR_INTERRUPT_HW31      40
+#define NDS32_VECTOR_INTERRUPT_HW32      41
+#define NDS32_VECTOR_INTERRUPT_HW33      42
+#define NDS32_VECTOR_INTERRUPT_HW34      43
+#define NDS32_VECTOR_INTERRUPT_HW35      44
+#define NDS32_VECTOR_INTERRUPT_HW36      45
+#define NDS32_VECTOR_INTERRUPT_HW37      46
+#define NDS32_VECTOR_INTERRUPT_HW38      47
+#define NDS32_VECTOR_INTERRUPT_HW39      48
+#define NDS32_VECTOR_INTERRUPT_HW40      49
+#define NDS32_VECTOR_INTERRUPT_HW41      50
+#define NDS32_VECTOR_INTERRUPT_HW42      51
+#define NDS32_VECTOR_INTERRUPT_HW43      52
+#define NDS32_VECTOR_INTERRUPT_HW44      53
+#define NDS32_VECTOR_INTERRUPT_HW45      54
+#define NDS32_VECTOR_INTERRUPT_HW46      55
+#define NDS32_VECTOR_INTERRUPT_HW47      56
+#define NDS32_VECTOR_INTERRUPT_HW48      57
+#define NDS32_VECTOR_INTERRUPT_HW49      58
+#define NDS32_VECTOR_INTERRUPT_HW50      59
+#define NDS32_VECTOR_INTERRUPT_HW51      60
+#define NDS32_VECTOR_INTERRUPT_HW52      61
+#define NDS32_VECTOR_INTERRUPT_HW53      62
+#define NDS32_VECTOR_INTERRUPT_HW54      63
+#define NDS32_VECTOR_INTERRUPT_HW55      64
+#define NDS32_VECTOR_INTERRUPT_HW56      65
+#define NDS32_VECTOR_INTERRUPT_HW57      66
+#define NDS32_VECTOR_INTERRUPT_HW58      67
+#define NDS32_VECTOR_INTERRUPT_HW59      68
+#define NDS32_VECTOR_INTERRUPT_HW60      69
+#define NDS32_VECTOR_INTERRUPT_HW61      70
+#define NDS32_VECTOR_INTERRUPT_HW62      71
+#define NDS32_VECTOR_INTERRUPT_HW63      72
+
+#define NDS32ATTR_RESET(option)          __attribute__((reset(option)))
+#define NDS32ATTR_EXCEPT(type)           __attribute__((exception(type)))
+#define NDS32ATTR_EXCEPTION(type)        __attribute__((exception(type)))
+#define NDS32ATTR_INTERRUPT(type)        __attribute__((interrupt(type)))
+#define NDS32ATTR_ISR(type)              __attribute__((interrupt(type)))
+
+#endif /* nds32_isr.h */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-linux.opt gcc-4.9.4/gcc/config/nds32/nds32-linux.opt
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-linux.opt	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-linux.opt	2016-08-08 20:37:45.506270091 +0200
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-load-store-opt.c gcc-4.9.4/gcc/config/nds32/nds32-load-store-opt.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-load-store-opt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-load-store-opt.c	2016-08-08 20:37:45.506270091 +0200
@@ -0,0 +1,820 @@
+/* load-store-opt pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"   /* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"     /* For DFA state_t.  */
+#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
+#include "reload.h"     /* For push_reload ().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"     /* For add_builtin_function ().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "ira-int.h"
+#include "nds32-load-store-opt.h"
+#include <set>
+
+#define NDS32_GPR_NUM 32
+
+static new_base_reg_info_t gen_new_base (rtx,
+					 offset_info_t,
+					 unsigned,
+					 HOST_WIDE_INT,
+					 HOST_WIDE_INT);
+
+static bool debug_live_reg = false;
+
+static const load_store_optimize_pass *load_store_optimizes[] =
+{
+  /*    allow_regclass, new_base_regclass,
+	offset_lower_bound, offset_upper_bound,
+	load_only_p, name */
+  new load_store_optimize_pass (
+	LOW_REGS, LOW_REGS,
+	0, (32-4),
+	false, "lswi333"),
+  new load_store_optimize_pass (
+	LOW_REGS, FRAME_POINTER_REG,
+	0, (512-4),
+	false, "lswi37"),
+  new load_store_optimize_pass (
+	MIDDLE_REGS, GENERAL_REGS,
+	0, 0,
+	false, "lswi450"),
+  new load_store_optimize_pass (
+	MIDDLE_REGS, R8_REG,
+	-128, -4,
+	true, "lwi45fe")
+};
+
+static const int N_LOAD_STORE_OPT_TYPE = sizeof (load_store_optimizes)
+					 / sizeof (load_store_optimize_pass*);
+
+load_store_optimize_pass
+::load_store_optimize_pass (enum reg_class allow_regclass,
+			    enum reg_class new_base_regclass,
+			    HOST_WIDE_INT offset_lower_bound,
+			    HOST_WIDE_INT offset_upper_bound,
+			    bool load_only_p,
+			    const char *name)
+  : m_allow_regclass (allow_regclass),
+    m_new_base_regclass (new_base_regclass),
+    m_offset_lower_bound (offset_lower_bound),
+    m_offset_upper_bound (offset_upper_bound),
+    m_load_only_p (load_only_p),
+    m_name (name)
+{
+  gcc_assert (offset_lower_bound <= offset_upper_bound);
+}
+
+int
+load_store_optimize_pass::calc_gain (HARD_REG_SET *available_regset,
+				     offset_info_t offset_info,
+				     load_store_infos_t *load_store_info) const
+{
+  int extra_cost = 0;
+  int gain = 0;
+  unsigned i;
+  unsigned chain_size;
+  unsigned new_base_regnum;
+  HOST_WIDE_INT allow_range = m_offset_upper_bound - m_offset_lower_bound;
+  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
+  chain_size = load_store_info->length ();
+
+  if (new_base_regnum == INVALID_REGNUM)
+    {
+      if (dump_file)
+	fprintf (dump_file,
+		 "%s have no avariable register, so give up try %s\n",
+		 reg_class_names[m_new_base_regclass],
+		 m_name);
+      return 0;
+    }
+  else if (dump_file)
+    fprintf (dump_file,
+	     "%s is avariable, get %s, try %s, chain size = %u\n",
+	     reg_class_names[m_new_base_regclass],
+	     reg_names[new_base_regnum],
+	     m_name,
+	     chain_size);
+
+  HOST_WIDE_INT range = offset_info.max_offset - offset_info.min_offset;
+
+  if (range > allow_range)
+    {
+      /* TODO: We can perform load-store opt for only part of load store.  */
+      if (dump_file)
+	fprintf (dump_file,
+		 "range is too large for %s"
+		 " (range = " HOST_WIDE_INT_PRINT_DEC ", "
+		 "allow_range = " HOST_WIDE_INT_PRINT_DEC ")\n",
+		 m_name, range, allow_range);
+      return 0;
+    }
+
+  if (offset_info.min_offset >= m_offset_lower_bound
+      && offset_info.max_offset <= m_offset_upper_bound)
+    {
+      /* mov55.  */
+      extra_cost = 2;
+    }
+  else
+    {
+      if (satisfies_constraint_Is15 (GEN_INT (offset_info.min_offset
+						   - m_offset_lower_bound)))
+	{
+	  /* add.  */
+	  extra_cost = 4;
+	}
+      else
+	{
+	  /* TODO: Try m_offset_upper_bound instead of m_offset_lower_bound
+		   again.  */
+	  /* add45 + movi.  */
+	  if (satisfies_constraint_Is20 (GEN_INT (offset_info.min_offset
+						  - m_offset_lower_bound)))
+	    extra_cost = 6;
+	  else
+	    return -1; /* Give up if this constant is too large.  */
+	}
+    }
+
+  for (i = 0; i < chain_size; ++i)
+    {
+      if (m_load_only_p && !(*load_store_info)[i].load_p)
+	continue;
+
+      if (in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
+	gain += 2;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "%s: gain = %d extra_cost = %d\n",
+	     m_name, gain, extra_cost);
+
+  return gain - extra_cost;
+}
+
+
+void
+load_store_optimize_pass::do_optimize (
+  HARD_REG_SET *available_regset,
+  offset_info_t offset_info,
+  load_store_infos_t *load_store_info) const
+{
+  new_base_reg_info_t new_base_reg_info;
+  rtx load_store_insn;
+  unsigned new_base_regnum;
+
+  new_base_regnum  = find_available_reg (available_regset, m_new_base_regclass);
+  gcc_assert (new_base_regnum != INVALID_REGNUM);
+
+  new_base_reg_info =
+    gen_new_base ((*load_store_info)[0].base_reg,
+		  offset_info,
+		  new_base_regnum,
+		  m_offset_lower_bound, m_offset_upper_bound);
+  unsigned i;
+  rtx insn;
+  insn = emit_insn_before (new_base_reg_info.set_insns[0],
+			   (*load_store_info)[0].insn);
+  if (new_base_reg_info.n_set_insns > 1)
+    {
+      gcc_assert (new_base_reg_info.n_set_insns == 2);
+      emit_insn_before (new_base_reg_info.set_insns[1], insn);
+    }
+
+  for (i = 0; i < load_store_info->length (); ++i)
+    {
+      if (m_load_only_p && !(*load_store_info)[i].load_p)
+	continue;
+
+      if (!in_reg_class_p ((*load_store_info)[i].reg, m_allow_regclass))
+	continue;
+
+      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
+
+      if (new_base_reg_info.need_adjust_offset_p)
+	offset = offset + new_base_reg_info.adjust_offset;
+
+      load_store_insn =
+	gen_reg_plus_imm_load_store ((*load_store_info)[i].reg,
+				     new_base_reg_info.reg,
+				     offset,
+				     (*load_store_info)[i].load_p,
+				     (*load_store_info)[i].mem);
+
+      emit_insn_before (load_store_insn, (*load_store_info)[i].insn);
+
+      delete_insn ((*load_store_info)[i].insn);
+    }
+
+  /* Recompute it CFG, to update BB_END() instruction.  */
+  compute_bb_for_insn ();
+}
+
+static new_base_reg_info_t
+gen_new_base (rtx original_base_reg,
+	      offset_info_t offset_info,
+	      unsigned new_base_regno,
+	      HOST_WIDE_INT offset_lower,
+	      HOST_WIDE_INT offset_upper)
+{
+  new_base_reg_info_t new_base_reg_info;
+
+  new_base_reg_info.reg = gen_rtx_REG (Pmode, new_base_regno);
+
+  /* Setup register info.  */
+  ORIGINAL_REGNO (new_base_reg_info.reg) = ORIGINAL_REGNO (original_base_reg);
+  REG_ATTRS (new_base_reg_info.reg) = REG_ATTRS (original_base_reg);
+
+  if (offset_info.max_offset <= offset_upper
+      && offset_info.min_offset >= offset_lower)
+    {
+      new_base_reg_info.set_insns[0] = gen_movsi (new_base_reg_info.reg,
+						  original_base_reg);
+      new_base_reg_info.n_set_insns = 1;
+      new_base_reg_info.need_adjust_offset_p = false;
+      new_base_reg_info.adjust_offset = 0;
+    }
+  else
+    {
+      /* For example:
+	 lwi45.fe allow -4 ~ -128 range:
+	 offset_lower = #-4
+	 offset_upper = #-128
+
+	 lwi $r2, [$r12 + #10]
+	 ->
+	 addi $r8, $r12, #14      ! $r8 = $r12 + #10 - offset_lower
+				  ! = $r12 + #10 - #-4
+				  ! = $r12 + #14
+	 lwi45.fe $r2, [$r8 - #4] ! [$r8 - #4]
+				  ! = [$r12 + #14 - #4]
+				  ! = [$r12 + #10]
+      */
+      new_base_reg_info.adjust_offset =
+	-(offset_info.min_offset - offset_lower);
+
+      rtx offset = GEN_INT (-new_base_reg_info.adjust_offset);
+
+
+      if (satisfies_constraint_Is15 (offset))
+	{
+	  new_base_reg_info.set_insns[0] =
+	    gen_addsi3(new_base_reg_info.reg,
+		       original_base_reg,
+		       offset);
+
+	  new_base_reg_info.n_set_insns = 1;
+	}
+      else
+	{
+	  if (!satisfies_constraint_Is20 (offset))
+	    gcc_unreachable ();
+
+	  new_base_reg_info.set_insns[1] =
+	    gen_rtx_SET (VOIDmode,
+			 new_base_reg_info.reg,
+			 GEN_INT (-new_base_reg_info.adjust_offset));
+
+	  new_base_reg_info.set_insns[0] =
+	    gen_addsi3 (new_base_reg_info.reg,
+			new_base_reg_info.reg,
+			original_base_reg);
+
+	  new_base_reg_info.n_set_insns = 2;
+	}
+
+      new_base_reg_info.need_adjust_offset_p = true;
+    }
+
+  return new_base_reg_info;
+}
+
+static bool
+nds32_4byte_load_store_reg_plus_offset (
+  rtx insn,
+  load_store_info_t *load_store_info)
+{
+  if (!INSN_P (insn))
+    return false;
+
+  rtx pattern = PATTERN (insn);
+  rtx mem = NULL_RTX;
+  rtx reg = NULL_RTX;
+  rtx base_reg = NULL_RTX;
+  rtx addr;
+  HOST_WIDE_INT offset = 0;
+  bool load_p = false;
+
+  if (GET_CODE (pattern) != SET)
+    return false;
+
+  if (MEM_P (SET_SRC (pattern)))
+    {
+      mem = SET_SRC (pattern);
+      reg = SET_DEST (pattern);
+      load_p = true;
+    }
+
+  if (MEM_P (SET_DEST (pattern)))
+    {
+      mem = SET_DEST (pattern);
+      reg = SET_SRC (pattern);
+      load_p = false;
+    }
+
+  if (mem == NULL_RTX || reg == NULL_RTX || !REG_P (reg))
+    return false;
+
+  gcc_assert (REG_P (reg));
+
+  addr = XEXP (mem, 0);
+
+  /* We only care about [reg] and [reg+const].  */
+  if (REG_P (addr))
+    {
+      base_reg = addr;
+      offset = 0;
+    }
+  else if (GET_CODE (addr) == PLUS
+	   && CONST_INT_P (XEXP (addr, 1)))
+    {
+      base_reg = XEXP (addr, 0);
+      offset = INTVAL (XEXP (addr, 1));
+      if (!REG_P (base_reg))
+	return false;
+    }
+  else
+    return false;
+
+  /* At least need MIDDLE_REGS.  */
+  if (!in_reg_class_p (reg, MIDDLE_REGS))
+    return false;
+
+  /* lwi450/swi450 */
+  if (offset == 0)
+    return false;
+
+  if (in_reg_class_p (reg, LOW_REGS))
+    {
+      /* lwi37.sp/swi37.sp/lwi37/swi37 */
+      if ((REGNO (base_reg) == SP_REGNUM
+	   || REGNO (base_reg) == FP_REGNUM)
+	  && (offset >= 0 && offset < 512 && (offset % 4 == 0)))
+	return false;
+
+      /* lwi333/swi333 */
+      if (in_reg_class_p (base_reg, LOW_REGS)
+	  && (offset >= 0 && offset < 32 && (offset % 4 == 0)))
+	return false;
+    }
+
+  if (load_store_info)
+    {
+      load_store_info->load_p   = load_p;
+      load_store_info->offset   = offset;
+      load_store_info->reg      = reg;
+      load_store_info->base_reg = base_reg;
+      load_store_info->insn     = insn;
+      load_store_info->mem      = mem;
+    }
+
+  if (GET_MODE (reg) != SImode)
+    return false;
+
+  return true;
+}
+
+static bool
+nds32_4byte_load_store_reg_plus_offset_p (rtx insn)
+{
+  return nds32_4byte_load_store_reg_plus_offset (insn, NULL);
+}
+
+static bool
+nds32_load_store_opt_profitable_p (basic_block bb)
+{
+  int condidate = 0;
+  int threshold = 2;
+  rtx insn;
+
+  if (dump_file)
+    fprintf (dump_file, "scan bb %d\n", bb->index);
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (nds32_4byte_load_store_reg_plus_offset_p (insn))
+	condidate++;
+    }
+
+  if (dump_file)
+    fprintf (dump_file, " condidate = %d\n", condidate);
+
+  return condidate >= threshold;
+}
+
+static void
+nds32_live_regs (basic_block bb, rtx first, rtx last, bitmap *live)
+{
+  df_ref *def_rec;
+  rtx insn;
+  bitmap_copy (*live, DF_LR_IN (bb));
+  df_simulate_initialize_forwards (bb, *live);
+  rtx first_insn = BB_HEAD (bb);
+
+  for (insn = first_insn; insn != first; insn = NEXT_INSN (insn))
+    df_simulate_one_insn_forwards (bb, insn, *live);
+
+  if (dump_file && debug_live_reg)
+    {
+      fprintf (dump_file, "scan live regs:\nfrom:\n");
+      print_rtl_single (dump_file, first);
+
+      fprintf (dump_file, "to:\n");
+      print_rtl_single (dump_file, last);
+
+      fprintf (dump_file, "bb lr in:\n");
+      dump_bitmap (dump_file, DF_LR_IN (bb));
+
+      fprintf (dump_file, "init:\n");
+      dump_bitmap (dump_file, *live);
+    }
+
+  for (insn = first; insn != last; insn = NEXT_INSN (insn))
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      for (def_rec = DF_INSN_DEFS (insn);
+	   *def_rec; def_rec++)
+	bitmap_set_bit (*live, DF_REF_REGNO (*def_rec));
+
+      if (dump_file && debug_live_reg)
+	{
+	  fprintf (dump_file, "scaning:\n");
+	  print_rtl_single (dump_file, insn);
+	  dump_bitmap (dump_file, *live);
+	}
+    }
+
+  gcc_assert (INSN_P (insn));
+
+  for (def_rec = DF_INSN_DEFS (insn);
+       *def_rec; def_rec++)
+    bitmap_set_bit (*live, DF_REF_REGNO (*def_rec));
+
+  if (dump_file && debug_live_reg)
+    {
+      fprintf (dump_file, "scaning:\n");
+      print_rtl_single (dump_file, last);
+      dump_bitmap (dump_file, *live);
+    }
+}
+
+static void
+print_hard_reg_set (FILE *file, const char *prefix, HARD_REG_SET set)
+{
+  int i;
+  bool first = true;
+  fprintf (file, "%s{ ", prefix);
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (TEST_HARD_REG_BIT (set, i))
+	{
+	  if (first)
+	    {
+	      fprintf (file, "%s", reg_names[i]);
+	      first = false;
+	    }
+	  else
+	    fprintf (file, ", %s", reg_names[i]);
+	}
+    }
+  fprintf (file, "}\n");
+}
+
+static offset_info_t
+nds32_get_offset_info (auto_vec<load_store_info_t, 64> *load_store_info)
+{
+  unsigned i;
+  std::set<HOST_WIDE_INT> offsets;
+  offset_info_t offset_info;
+  offset_info.max_offset = 0;
+  offset_info.min_offset = 0;
+  offset_info.num_offset = 0;
+
+  if (load_store_info->length () == 0)
+    return offset_info;
+
+  offset_info.max_offset = (*load_store_info)[0].offset;
+  offset_info.min_offset = (*load_store_info)[0].offset;
+  offsets.insert ((*load_store_info)[0].offset);
+
+  for (i = 1; i < load_store_info->length (); i++)
+    {
+      HOST_WIDE_INT offset = (*load_store_info)[i].offset;
+      offset_info.max_offset = MAX (offset_info.max_offset, offset);
+      offset_info.min_offset = MIN (offset_info.min_offset, offset);
+      offsets.insert (offset);
+    }
+
+  offset_info.num_offset = offsets.size ();
+
+  return offset_info;
+}
+
+static void
+nds32_get_available_reg_set (basic_block bb,
+			     rtx first,
+			     rtx last,
+			     HARD_REG_SET *available_regset)
+{
+  bitmap live;
+  HARD_REG_SET live_regset;
+  unsigned i;
+  live = BITMAP_ALLOC (&reg_obstack);
+
+  nds32_live_regs (bb, first, last, &live);
+
+  REG_SET_TO_HARD_REG_SET (live_regset, live);
+
+  /* Reverse available_regset. */
+  COMPL_HARD_REG_SET (*available_regset, live_regset);
+
+  /* We only care $r0-$r31, so mask $r0-$r31.  */
+  AND_HARD_REG_SET (*available_regset, reg_class_contents[GENERAL_REGS]);
+
+  /* Fixed register also not available.  */
+  for (i = NDS32_FIRST_GPR_REGNUM; i <= NDS32_LAST_GPR_REGNUM; ++i)
+    {
+      if (fixed_regs[i])
+	CLEAR_HARD_REG_BIT (*available_regset, i);
+    }
+
+  BITMAP_FREE (live);
+}
+
+static void
+nds32_do_load_store_opt (basic_block bb)
+{
+  rtx insn;
+  load_store_info_t load_store_info;
+  auto_vec<load_store_info_t, 64> load_store_infos[NDS32_GPR_NUM];
+  HARD_REG_SET available_regset;
+  int i;
+  unsigned j;
+  unsigned regno;
+  unsigned polluting;
+  df_ref *def_rec;
+  /* Dirty mean a register is define again after
+     first load/store instruction.
+     For example:
+
+     lwi $r2, [$r3 + #0x100]
+     mov $r3, $r4            ! $r3 is dirty after this instruction.
+     lwi $r1, [$r3 + #0x120] ! so this load can't chain with prev load.
+   */
+  bool dirty[NDS32_GPR_NUM];
+
+  if (dump_file)
+    fprintf (dump_file, "try load store opt for bb %d\n", bb->index);
+
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    dirty[i] = false;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      polluting = INVALID_REGNUM;
+
+      /* Set def reg is dirty if chain is not empty.  */
+      for (def_rec = DF_INSN_DEFS (insn);
+	   *def_rec; def_rec++)
+	{
+	  regno = DF_REF_REGNO (*def_rec);
+
+	  if (!NDS32_IS_GPR_REGNUM (regno))
+	    continue;
+
+	  if (!load_store_infos[regno].is_empty ())
+	    {
+	      /* Set pulluting here because the source register
+		 may be the same one.  */
+	      if (dirty[regno] == false)
+		polluting = regno;
+
+	      dirty[regno] = true;
+	    }
+	}
+
+      /* Set all caller-save register is dirty if chain is not empty.  */
+      if (CALL_P (insn))
+	{
+	  for (i = 0; i < NDS32_GPR_NUM; ++i)
+	    {
+	      if (call_used_regs[i] && !load_store_infos[i].is_empty ())
+		dirty[i] = true;
+	    }
+	}
+
+      if (nds32_4byte_load_store_reg_plus_offset (insn, &load_store_info))
+	{
+	  regno = REGNO (load_store_info.base_reg);
+	  gcc_assert (NDS32_IS_GPR_REGNUM (regno));
+
+	  /* Don't add to chain if this reg is dirty.  */
+	  if (dirty[regno] && polluting != regno)
+	    break;
+
+	  /* If the register is first time to be used and be polluted
+	     right away, we don't push it.  */
+	  if (regno == REGNO (load_store_info.reg) && load_store_info.load_p
+	      && dirty[regno] == false)
+	    continue;
+
+	  load_store_infos[regno].safe_push (load_store_info);
+	}
+    }
+  for (i = 0; i < NDS32_GPR_NUM; ++i)
+    {
+      if (load_store_infos[i].length () <= 1)
+	{
+	  if (dump_file && load_store_infos[i].length () == 1)
+	    fprintf (dump_file,
+		     "Skip Chain for $r%d since chain size only 1\n",
+		     i);
+	  continue;
+	}
+
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "Chain for $r%d: (size = %u)\n",
+		   i, load_store_infos[i].length ());
+
+	  for (j = 0; j < load_store_infos[i].length (); ++j)
+	    {
+	      fprintf (dump_file,
+		       "regno = %d base_regno = %d "
+		       "offset = " HOST_WIDE_INT_PRINT_DEC " "
+		       "load_p = %d UID = %u\n",
+		       REGNO (load_store_infos[i][j].reg),
+		       REGNO (load_store_infos[i][j].base_reg),
+		       load_store_infos[i][j].offset,
+		       load_store_infos[i][j].load_p,
+		       INSN_UID (load_store_infos[i][j].insn));
+	    }
+	}
+
+      nds32_get_available_reg_set (bb,
+				   load_store_infos[i][0].insn,
+				   load_store_infos[i].last ().insn,
+				   &available_regset);
+
+      if (dump_file)
+	{
+	  print_hard_reg_set (dump_file, "", available_regset);
+	}
+
+      offset_info_t offset_info = nds32_get_offset_info (&load_store_infos[i]);
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "max offset = " HOST_WIDE_INT_PRINT_DEC "\n"
+		   "min offset = " HOST_WIDE_INT_PRINT_DEC "\n"
+		   "num offset = %d\n",
+		   offset_info.max_offset,
+		   offset_info.min_offset,
+		   offset_info.num_offset);
+	}
+
+      int gain;
+      int best_gain = 0;
+      const load_store_optimize_pass *best_load_store_optimize_pass = NULL;
+
+      for (j = 0; j < N_LOAD_STORE_OPT_TYPE; ++j)
+	{
+	  gain = load_store_optimizes[j]->calc_gain (&available_regset,
+						     offset_info,
+						     &load_store_infos[i]);
+
+	  if (dump_file)
+	    fprintf (dump_file, "%s gain = %d\n",
+		     load_store_optimizes[j]->name (), gain);
+
+	  if (gain > best_gain)
+	    {
+	      best_gain = gain;
+	      best_load_store_optimize_pass = load_store_optimizes[j];
+	    }
+	}
+
+      if (best_load_store_optimize_pass)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "%s is most profit, optimize it!\n",
+		     best_load_store_optimize_pass->name ());
+
+	  best_load_store_optimize_pass->do_optimize (&available_regset,
+						      offset_info,
+						      &load_store_infos[i]);
+
+	  df_insn_rescan_all ();
+	}
+
+    }
+}
+
+static unsigned int
+nds32_load_store_opt (void)
+{
+  basic_block bb;
+
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (nds32_load_store_opt_profitable_p (bb))
+	nds32_do_load_store_opt (bb);
+    }
+
+  return 1;
+}
+
+const pass_data pass_data_nds32_load_store_opt =
+{
+  RTL_PASS,				/* type */
+  "load_store_opt",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing),		/* todo_flags_finish */
+};
+
+class pass_nds32_load_store_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_load_store_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_load_store_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return TARGET_16_BIT && TARGET_LOAD_STORE_OPT; }
+  unsigned int execute () { return nds32_load_store_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_load_store_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_load_store_opt (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-load-store-opt.h gcc-4.9.4/gcc/config/nds32/nds32-load-store-opt.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-load-store-opt.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-load-store-opt.h	2016-08-08 20:37:45.506270091 +0200
@@ -0,0 +1,128 @@
+/* Prototypes for load-store-opt of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef NDS32_LOAD_STORE_OPT_H
+#define NDS32_LOAD_STORE_OPT_H
+
+/* Define the type of a set of hard registers.  */
+
+typedef struct {
+  rtx reg;
+  rtx base_reg;
+  rtx offset;
+  HOST_WIDE_INT shift;
+  bool load_p;
+  rtx insn;
+} rr_load_store_info_t;
+
+typedef struct {
+  rtx reg;
+  rtx base_reg;
+  HOST_WIDE_INT offset;
+  bool load_p;
+  rtx insn;
+  rtx mem;
+} load_store_info_t;
+
+typedef struct {
+  HOST_WIDE_INT max_offset;
+  HOST_WIDE_INT min_offset;
+  /* How many different offset.  */
+  int num_offset;
+} offset_info_t;
+
+typedef struct {
+  rtx set_insns[2];
+  int n_set_insns;
+  rtx reg;
+  bool need_adjust_offset_p;
+  HOST_WIDE_INT adjust_offset;
+} new_base_reg_info_t;
+
+typedef auto_vec<load_store_info_t, 64> load_store_infos_t;
+
+class load_store_optimize_pass
+{
+public:
+  load_store_optimize_pass (enum reg_class,
+			    enum reg_class,
+			    HOST_WIDE_INT,
+			    HOST_WIDE_INT,
+			    bool,
+			    const char *);
+  const char *name () const { return m_name; };
+  int calc_gain (HARD_REG_SET *,
+		 offset_info_t,
+		 load_store_infos_t *) const;
+  void do_optimize (HARD_REG_SET *,
+		    offset_info_t,
+		    load_store_infos_t *) const;
+private:
+  enum reg_class m_allow_regclass;
+  enum reg_class m_new_base_regclass;
+  HOST_WIDE_INT m_offset_lower_bound;
+  HOST_WIDE_INT m_offset_upper_bound;
+  bool m_load_only_p;
+  const char *m_name;
+};
+
+static inline bool
+in_reg_class_p (unsigned regno, enum reg_class clazz)
+{
+  return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno);
+}
+
+static inline bool
+in_reg_class_p (rtx reg, enum reg_class clazz)
+{
+  gcc_assert (REG_P (reg));
+  return in_reg_class_p (REGNO (reg), clazz);
+}
+
+static inline rtx
+gen_reg_plus_imm_load_store (rtx reg, rtx base_reg,
+			     HOST_WIDE_INT offset, bool load_p, rtx oldmem)
+{
+  rtx addr = plus_constant(Pmode, base_reg, offset);
+  rtx mem = gen_rtx_MEM (SImode, addr);
+  MEM_COPY_ATTRIBUTES (mem, oldmem);
+  if (load_p)
+    return gen_movsi (reg, mem);
+  else
+    return gen_movsi (mem, reg);
+}
+
+static inline unsigned
+find_available_reg (HARD_REG_SET *available_regset, enum reg_class clazz)
+{
+  hard_reg_set_iterator hrsi;
+  unsigned regno;
+  EXECUTE_IF_SET_IN_HARD_REG_SET (reg_class_contents[clazz], 0, regno, hrsi)
+    {
+      /* Caller-save register or callee-save register but it's ever live.  */
+      if (TEST_HARD_REG_BIT (*available_regset, regno)
+	  && (call_used_regs[regno] || df_regs_ever_live_p (regno)))
+	return regno;
+    }
+
+  return INVALID_REGNUM;
+}
+
+#endif /* ! NDS32_LOAD_STORE_OPT_H */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32.md gcc-4.9.4/gcc/config/nds32/nds32.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32.md	2016-08-08 20:37:45.590273343 +0200
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -46,58 +46,140 @@
 ;; Include DImode/DFmode operations.
 (include "nds32-doubleword.md")
 
+;; Include floating-point patterns.
+(include "nds32-fpu.md")
+
 ;; Include peephole patterns.
 (include "nds32-peephole2.md")
 
 
+;; ------------------------------------------------------------------------
+
+;; CPU pipeline model.
+(define_attr "pipeline_model" "n7,n8,e8,n9,n13,simple"
+  (const
+    (cond [(match_test "nds32_cpu_option == CPU_N7")  (const_string "n7")
+	   (match_test "nds32_cpu_option == CPU_N8")  (const_string "n8")
+	   (match_test "nds32_cpu_option == CPU_E8")  (const_string "e8")
+	   (match_test "nds32_cpu_option == CPU_N9")  (const_string "n9")
+	   (match_test "nds32_cpu_option == CPU_N10") (const_string "n9")
+	   (match_test "nds32_cpu_option == CPU_N12") (const_string "n13")
+	   (match_test "nds32_cpu_option == CPU_N13") (const_string "n13")
+	   (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
+	  (const_string "n9"))))
+
 ;; Insn type, it is used to default other attribute values.
 (define_attr "type"
-  "unknown,move,load,store,alu,compare,branch,call,misc"
+  "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc"
   (const_string "unknown"))
 
-
 ;; Length, in bytes, default is 4-bytes.
 (define_attr "length" "" (const_int 4))
 
+;; Indicate the amount of micro instructions.
+(define_attr "combo"
+  "0,1,2,3,4,5,6,7,8,9,10,12"
+  (const_string "1"))
+
+;; Insn in which feature set, it is used to enable/disable insn alternatives.
+;; v1  : Baseline Instructions
+;; v2  : Baseline Version 2 Instructions
+;; v3m : Baseline Version 3m Instructions
+;; v3  : Baseline Version 3 Instructions
+;; pe1 : Performance Extension Instructions
+;; pe2 : Performance Extension Version 2 Instructions
+;; se  : String Extension instructions
+(define_attr "feature"
+  "v1,v2,v3m,v3,pe1,pe2,se,fpu"
+  (const_string "v1"))
+;; Because linker relaxation only can reduce size, gcc has to forbid some
+;; 2-byte insntruction patterns which may be tagged relax hint.
+(define_attr "relaxable"
+  "yes,no"
+  (const_string "yes"))
 
 ;; Enabled, which is used to enable/disable insn alternatives.
 ;; Note that we use length and TARGET_16_BIT here as criteria.
-;; If the instruction pattern already check TARGET_16_BIT to
-;; determine the length by itself, its enabled attribute should be
-;; always 1 to avoid the conflict with the settings here.
-(define_attr "enabled" ""
-  (cond [(and (eq_attr "length" "2")
-	      (match_test "!TARGET_16_BIT"))
-	 (const_int 0)]
-	(const_int 1)))
+;; If the instruction pattern already check TARGET_16_BIT to determine
+;; the length by itself, its enabled attribute should be customized to
+;; avoid the conflict between length attribute and this default setting.
+(define_attr "enabled" "no,yes"
+  (if_then_else
+    (ior (and (eq_attr "length" "2") (match_test "!TARGET_16_BIT"))
+         (and (eq_attr "relaxable" "no") (match_test "TARGET_LINUX_ABI")))
+    (const_string "no")
+    (cond [(eq_attr "feature" "v1")   (const_string "yes")
+	   (eq_attr "feature" "v2")   (if_then_else (match_test "TARGET_ISA_V2 || TARGET_ISA_V3 || TARGET_ISA_V3M")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "v3")   (if_then_else (match_test "TARGET_ISA_V3")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "v3m")  (if_then_else (match_test "TARGET_ISA_V3 || TARGET_ISA_V3M")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "pe1")  (if_then_else (match_test "TARGET_EXT_PERF")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "pe2")  (if_then_else (match_test "TARGET_EXT_PERF2")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "se")   (if_then_else (match_test "TARGET_EXT_STRING")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "fpu")  (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE")
+						    (const_string "yes")
+						    (const_string "no"))]
+	   (const_string "yes"))))
 
 
 ;; ----------------------------------------------------------------------------
 
+(include "nds32-dspext.md")
 
 ;; Move instructions.
 
 ;; For QImode and HImode, the immediate value can be fit in imm20s.
 ;; So there is no need to split rtx for QI and HI patterns.
 
-(define_expand "movqi"
-  [(set (match_operand:QI 0 "general_operand" "")
-	(match_operand:QI 1 "general_operand" ""))]
+(define_expand "mov<mode>"
+  [(set (match_operand:QIHI 0 "general_operand" "")
+	(match_operand:QIHI 1 "general_operand" ""))]
   ""
 {
   /* Need to force register if mem <- !reg.  */
   if (MEM_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (QImode, operands[1]);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[1]) && optimize > 0)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (reg, operands[1]));
+      operands[1] = gen_lowpart (<MODE>mode, reg);
+    }
 })
 
-(define_expand "movhi"
-  [(set (match_operand:HI 0 "general_operand" "")
-	(match_operand:HI 1 "general_operand" ""))]
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SIDI 0 "general_operand" "")
+	(match_operand:SIDI 1 "general_operand" ""))]
   ""
 {
-  /* Need to force register if mem <- !reg.  */
+  rtx addr;
   if (MEM_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (HImode, operands[1]);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
+    }
+  DONE;
 })
 
 (define_expand "movsi"
@@ -130,12 +212,27 @@
 						  low12_int));
       DONE;
     }
+
+  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
 })
 
 (define_insn "*mov<mode>"
-  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
-	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
-  ""
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q, A")
+	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, Ip05, Is05, Is20, Ihig, *f,  r, *f,  Q, *f, r"))]
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
 {
   switch (which_alternative)
     {
@@ -154,37 +251,55 @@
     case 8:
     case 9:
     case 10:
-      return nds32_output_16bit_load (operands, <byte>);
     case 11:
-      return nds32_output_32bit_load (operands, <byte>);
+      return nds32_output_16bit_load (operands, <byte>);
     case 12:
-      return "movpi45\t%0, %1";
+      return nds32_output_32bit_load (operands, <byte>);
     case 13:
-      return "movi55\t%0, %1";
+      return "movpi45\t%0, %1";
     case 14:
-      return "movi\t%0, %1";
+      return "movi55\t%0, %1";
     case 15:
+      return "movi\t%0, %1";
+    case 16:
       return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
+    case 22:
+      return "mtusr\t%1, %0";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
+  [(set_attr "type"      "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,unknown,unknown,unknown,unknown,unknown,alu")
+   (set_attr "length"    "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,      4,      4,      4,      4,      4,  4")
+   (set_attr "feature"   " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1,    fpu,    fpu,    fpu,    fpu,    fpu, v1")
+   (set_attr "relaxable" "yes,yes,   no,   no,  yes,   no,  yes,  no,  no, yes,  no, yes, yes,yes,yes,yes,yes,    yes,    yes,    yes,    yes,    yes,yes")])
 
 
 ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
 ;; are able to match such instruction template.
 (define_insn "*move_addr"
-  [(set (match_operand:SI 0 "register_operand"       "=l, r")
-	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  [(set (match_operand:SI 0 "nds32_general_register_operand"   "=l, r")
+	(match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))]
   ""
   "la\t%0, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "8")])
 
 
-(define_insn "*sethi"
+(define_insn "sethi"
   [(set (match_operand:SI 0 "register_operand"                "=r")
 	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
   ""
@@ -193,7 +308,7 @@
    (set_attr "length" "4")])
 
 
-(define_insn "*lo_sum"
+(define_insn "lo_sum"
   [(set (match_operand:SI 0 "register_operand"                  "=r")
 	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
 		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
@@ -208,8 +323,8 @@
 ;; Zero extension instructions.
 
 (define_insn "zero_extend<mode>si2"
-  [(set (match_operand:SI 0 "register_operand"                       "=l, r,   l, *r")
-	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33,  m")))]
+  [(set (match_operand:SI 0 "register_operand"                       "=$l, r,$  l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" "  l, r, U33,  m")))]
   ""
 {
   switch (which_alternative)
@@ -234,8 +349,8 @@
 ;; Sign extension instructions.
 
 (define_insn "extend<mode>si2"
-  [(set (match_operand:SI 0 "register_operand"                       "=l, r, r")
-	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))]
+  [(set (match_operand:SI 0 "register_operand"                       "=$l, r, r")
+	(sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" "  l, r, m")))]
   ""
 {
   switch (which_alternative)
@@ -245,7 +360,7 @@
     case 1:
       return "se<size>\t%0, %1";
     case 2:
-      return nds32_output_32bit_load_s (operands, <byte>);
+      return nds32_output_32bit_load_se (operands, <byte>);
 
     default:
       gcc_unreachable ();
@@ -259,22 +374,22 @@
 
 ;; Arithmetic instructions.
 
-(define_insn "add<mode>3"
-  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
-	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "    0,    l,    0,    l, %0, l,    0,    k,    r, r")
-		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=$   d,$   l,$   d,$   l,$ d,$l,$   k,$   l,    r, r")
+	(plus:SI (match_operand:SI 1 "register_operand"      "%    0,    l,    0,    l,  0, l,    0,    k,    r, r")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "  In05, In03, Iu05, Iu03,  r, l, Is10, IU06, Is15, r")))]
   ""
 {
   switch (which_alternative)
     {
     case 0:
       /* addi Rt4,Rt4,-x  ==>  subi45 Rt4,x
-         where 0 <= x <= 31 */
+	 where 0 <= x <= 31 */
       operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
       return "subi45\t%0, %2";
     case 1:
       /* addi Rt3,Ra3,-x  ==>  subi333 Rt3,Ra3,x
-         where 0 <= x <= 7 */
+	 where 0 <= x <= 7 */
       operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode);
       return "subi333\t%0, %1, %2";
     case 2:
@@ -298,19 +413,21 @@
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
-   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
-
-(define_insn "sub<mode>3"
-  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
-	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
-		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+  [(set_attr "type"      "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length"    "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")
+   (set_attr "feature"   " v1, v1, v1, v1, v1, v1, v2, v1, v1, v1")
+   (set_attr "relaxable" "yes,yes,yes,yes, no,yes,yes,yes,yes,yes")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand"                "=$d, $l,    r, r")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "  0,  l, Is15, r")
+		  (match_operand:SI 2 "register_operand"      "  r,  l,    r, r")))]
   ""
   "@
-  sub45\t%0, %2
-  sub333\t%0, %1, %2
-  subri\t%0, %2, %1
-  sub\t%0, %1, %2"
+   sub45\t%0, %2
+   sub333\t%0, %1, %2
+   subri\t%0, %2, %1
+   sub\t%0, %1, %2"
   [(set_attr "type"   "alu,alu,alu,alu")
    (set_attr "length" "  2,  2,  4,  4")])
 
@@ -320,10 +437,10 @@
 ;; and needs to ensure it is exact_log2 value.
 (define_insn "*add_slli"
   [(set (match_operand:SI 0 "register_operand"                    "=r")
-        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
 			  (match_operand:SI 2 "immediate_operand" " i"))
 		 (match_operand:SI 3 "register_operand"           " r")))]
-  "TARGET_ISA_V3
+  "TARGET_ISA_V3 && optimize_size
    && (exact_log2 (INTVAL (operands[2])) != -1)
    && (exact_log2 (INTVAL (operands[2])) <= 31)"
 {
@@ -333,18 +450,20 @@
 
   return "add_slli\t%0, %3, %1, %2";
 }
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 (define_insn "*add_srli"
-  [(set (match_operand:SI 0 "register_operand"                        "=   r")
-	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			      (match_operand:SI 2 "immediate_operand" " Iu05"))
-		 (match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                          "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			      (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "add_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 
 ;; GCC intends to simplify (minus (reg) (ashift ...))
@@ -355,7 +474,7 @@
 	(minus:SI (match_operand:SI 1 "register_operand"           " r")
 		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
 			   (match_operand:SI 3 "immediate_operand" " i"))))]
-  "TARGET_ISA_V3
+  "TARGET_ISA_V3 && optimize_size
    && (exact_log2 (INTVAL (operands[3])) != -1)
    && (exact_log2 (INTVAL (operands[3])) <= 31)"
 {
@@ -365,32 +484,35 @@
 
   return "sub_slli\t%0, %1, %2, %3";
 }
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 (define_insn "*sub_srli"
-  [(set (match_operand:SI 0 "register_operand"                         "=   r")
-	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
-		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
-			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"                 "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"    "    r")
+			       (match_operand:SI 3 "nds32_imm5u_operand" " Iu05"))))]
+  "TARGET_ISA_V3 && optimize_size"
   "sub_srli\t%0, %1, %2, %3"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 
 ;; Multiplication instructions.
 
 (define_insn "mulsi3"
-  [(set (match_operand:SI 0 "register_operand"          "= w, r")
-	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
-		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  [(set (match_operand:SI 0 "register_operand"          "=$l, r")
+	(mult:SI (match_operand:SI 1 "register_operand" "% 0, r")
+		 (match_operand:SI 2 "register_operand" "  l, r")))]
   ""
   "@
-  mul33\t%0, %2
-  mul\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+   mul33\t%0, %2
+   mul\t%0, %1, %2"
+  [(set_attr "type"    "mul,mul")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
 
 (define_insn "mulsidi3"
   [(set (match_operand:DI 0 "register_operand"                          "=r")
@@ -398,7 +520,7 @@
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
   "TARGET_ISA_V2 || TARGET_ISA_V3"
   "mulsr64\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mul")
    (set_attr "length"   "4")])
 
 (define_insn "umulsidi3"
@@ -407,7 +529,7 @@
 		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
   "TARGET_ISA_V2 || TARGET_ISA_V3"
   "mulr64\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mul")
    (set_attr "length"   "4")])
 
 
@@ -415,32 +537,32 @@
 
 (define_insn "*maddr32_0"
   [(set (match_operand:SI 0 "register_operand"                   "=r")
-        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
-                 (mult:SI (match_operand:SI 1 "register_operand" " r")
-                          (match_operand:SI 2 "register_operand" " r"))))]
+	(plus:SI (match_operand:SI 3 "register_operand"          " 0")
+		 (mult:SI (match_operand:SI 1 "register_operand" " r")
+			  (match_operand:SI 2 "register_operand" " r"))))]
   ""
   "maddr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 (define_insn "*maddr32_1"
   [(set (match_operand:SI 0 "register_operand"                   "=r")
-        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
-                          (match_operand:SI 2 "register_operand" " r"))
-                 (match_operand:SI 3 "register_operand"          " 0")))]
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+			  (match_operand:SI 2 "register_operand" " r"))
+		 (match_operand:SI 3 "register_operand"          " 0")))]
   ""
   "maddr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 (define_insn "*msubr32"
   [(set (match_operand:SI 0 "register_operand"                    "=r")
-        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
-                  (mult:SI (match_operand:SI 1 "register_operand" " r")
-                           (match_operand:SI 2 "register_operand" " r"))))]
+	(minus:SI (match_operand:SI 3 "register_operand"          " 0")
+		  (mult:SI (match_operand:SI 1 "register_operand" " r")
+			   (match_operand:SI 2 "register_operand" " r"))))]
   ""
   "msubr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 
@@ -448,26 +570,46 @@
 
 (define_insn "divmodsi4"
   [(set (match_operand:SI 0 "register_operand"         "=r")
-        (div:SI (match_operand:SI 1 "register_operand" " r")
-                (match_operand:SI 2 "register_operand" " r")))
+	(div:SI (match_operand:SI 1 "register_operand" " r")
+		(match_operand:SI 2 "register_operand" " r")))
    (set (match_operand:SI 3 "register_operand"         "=r")
-        (mod:SI (match_dup 1) (match_dup 2)))]
+	(mod:SI (match_dup 1) (match_dup 2)))]
   ""
   "divsr\t%0, %3, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "div")
    (set_attr "length"   "4")])
 
 (define_insn "udivmodsi4"
   [(set (match_operand:SI 0 "register_operand"          "=r")
-        (udiv:SI (match_operand:SI 1 "register_operand" " r")
-                (match_operand:SI 2 "register_operand"  " r")))
+	(udiv:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand"  " r")))
    (set (match_operand:SI 3 "register_operand"          "=r")
-        (umod:SI (match_dup 1) (match_dup 2)))]
+	(umod:SI (match_dup 1) (match_dup 2)))]
   ""
   "divr\t%0, %3, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "div")
+   (set_attr "length"   "4")])
+
+;; divsr/divr will keep quotient only when quotient and remainder is the same
+;; register in our ISA spec, it's can reduce 1 register presure if we don't
+;; want remainder.
+(define_insn "divsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(div:SI (match_operand:SI 1 "register_operand" " r")
+		(match_operand:SI 2 "register_operand" " r")))]
+  ""
+  "divsr\t%0, %0, %1, %2"
+  [(set_attr "type"   "div")
    (set_attr "length"   "4")])
 
+(define_insn "udivsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand"  " r")))]
+  ""
+  "divr\t%0, %0, %1, %2"
+  [(set_attr "type"   "div")
+   (set_attr "length"   "4")])
 
 ;; ----------------------------------------------------------------------------
 
@@ -488,14 +630,28 @@
    (set_attr "length" "4")]
 )
 
-(define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
-	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
-		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "nds32_reg_constant_operand" "")))]
+  ""
+{
+  if (CONST_INT_P (operands[2])
+      && !nds32_and_operand (operands[2], SImode))
+    {
+      nds32_expand_constant (SImode, INTVAL (operands[2]),
+			     operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=$l, r,$   l,$   l,$   l,$   l,$   l,$   l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand"  "% 0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "nds32_and_operand" "  l, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
   ""
 {
   HOST_WIDE_INT mask = INTVAL (operands[2]);
-  int zero_position;
 
   /* 16-bit andi instructions:
      andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
@@ -520,8 +676,7 @@
     case 5:
       return "x11b33\t%0, %1";
     case 6:
-      operands[2] = GEN_INT (floor_log2 (mask));
-      return "bmski33\t%0, %2";
+      return "bmski33\t%0, %B2";
     case 7:
       operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
       return "fexti33\t%0, %2";
@@ -535,47 +690,35 @@
       operands[2] = GEN_INT (~mask);
       return "bitci\t%0, %1, %2";
     case 12:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_bclr_p() test,
-         so that we can guarantee there is only one 0-bit
-         within the immediate value.  */
-      for (zero_position = 31; zero_position >= 0; zero_position--)
-	{
-	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
-	    {
-	      /* Found the 0-bit position.  */
-	      operands[2] = GEN_INT (zero_position);
-	      break;
-	    }
-	}
-      return "bclr\t%0, %1, %2";
+      return "bclr\t%0, %1, %b2";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+  [(set_attr "type"    "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1, v1, v1, v1,v3m,v3m, v1, v1, v1, v3,pe1")])
 
 (define_insn "*and_slli"
-  [(set (match_operand:SI 0 "register_operand"                      "=   r")
-	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
-			    (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"              "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"     "    r")
+			    (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "and_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*and_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "and_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -584,58 +727,50 @@
 
 ;; For V3/V3M ISA, we have 'or33' instruction.
 ;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
-(define_insn "iorsi3"
-  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
-	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
-		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
   ""
 {
-  int one_position;
-
-  switch (which_alternative)
-    {
-    case 0:
-      return "or33\t%0, %2";
-    case 1:
-      return "or\t%0, %1, %2";
-    case 2:
-      return "ori\t%0, %1, %2";
-    case 3:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_bset_p() test,
-         so that we can guarantee there is only one 1-bit
-         within the immediate value.  */
-      /* Use exact_log2() to search the 1-bit position.  */
-      one_position = exact_log2 (INTVAL (operands[2]));
-      operands[2] = GEN_INT (one_position);
-      return "bset\t%0, %1, %2";
+  if (!nds32_ior_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
 
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type"   "alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  4,  4")])
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=l, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0, r,    r,    r")
+		(match_operand:SI 2 "nds32_ior_operand" " l, r, Iu15, Ie15")))]
+  ""
+  "@
+   or33\t%0, %2
+   or\t%0, %1, %2
+   ori\t%0, %1, %2
+   bset\t%0, %1, %B2"
+  [(set_attr "type"    "alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1,pe1")])
 
 (define_insn "*or_slli"
-  [(set (match_operand:SI 0 "register_operand"                     "=   r")
-	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"             "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"    "    r")
+			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "or_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*or_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "or_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -644,69 +779,61 @@
 
 ;; For V3/V3M ISA, we have 'xor33' instruction.
 ;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
-(define_insn "xorsi3"
-  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
-	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
-		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
   ""
 {
-  int one_position;
-
-  switch (which_alternative)
-    {
-    case 0:
-      return "xor33\t%0, %2";
-    case 1:
-      return "xor\t%0, %1, %2";
-    case 2:
-      return "xori\t%0, %1, %2";
-    case 3:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_btgl_p() test,
-         so that we can guarantee there is only one 1-bit
-         within the immediate value.  */
-      /* Use exact_log2() to search the 1-bit position.  */
-      one_position = exact_log2 (INTVAL (operands[2]));
-      operands[2] = GEN_INT (one_position);
-      return "btgl\t%0, %1, %2";
+  if (!nds32_xor_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
 
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type"   "alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  4,  4")])
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=$l, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "% 0, r,    r,    r")
+		(match_operand:SI 2 "nds32_xor_operand" "  l, r, Iu15, It15")))]
+  ""
+  "@
+   xor33\t%0, %2
+   xor\t%0, %1, %2
+   xori\t%0, %1, %2
+   btgl\t%0, %1, %B2"
+  [(set_attr "type"    "alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1,pe1")])
 
 (define_insn "*xor_slli"
   [(set (match_operand:SI 0 "register_operand"                     "=   r")
 	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
 		(match_operand:SI 3 "register_operand"             "    r")))]
-  "TARGET_ISA_V3"
+  "TARGET_ISA_V3 && optimize_size"
   "xor_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*xor_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "xor_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 ;; Rotate Right Instructions.
 
-(define_insn "rotrsi3"
-  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
-	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
-		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+(define_insn "*rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                    "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"     "    r, r")
+		       (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
   ""
   "@
-  rotri\t%0, %1, %2
-  rotr\t%0, %1, %2"
+   rotri\t%0, %1, %2
+   rotr\t%0, %1, %2"
   [(set_attr "type"   "alu,alu")
    (set_attr "length" "  4,  4")])
 
@@ -720,14 +847,95 @@
 ;; And for V2 ISA, there is NO 'neg33' instruction.
 ;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
 (define_insn "negsi2"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r")
-	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r")
+	(neg:SI (match_operand:SI 1 "register_operand" "  l, r")))]
   ""
   "@
    neg33\t%0, %1
    subri\t%0, %1, 0"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF)
+    {
+      rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0);
+      rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0);
+
+      emit_insn (gen_xorsi3 (new_dst,
+			     new_src,
+			     gen_int_mode (0x80000000, SImode)));
+
+      DONE;
+    }
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  ""
+{
+})
+
+(define_insn_and_split "soft_negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "!TARGET_FPU_DOUBLE"
+  "#"
+  "!TARGET_FPU_DOUBLE"
+  [(const_int 1)]
+{
+    rtx src = operands[1];
+    rtx dst = operands[0];
+    rtx ori_dst = operands[0];
+
+    bool need_extra_move_for_dst_p;
+    /* FPU register can't change mode to SI directly, so we need create a
+       tmp register to handle it, and FPU register can't do `xor` or btgl.  */
+    if (HARD_REGISTER_P (src)
+	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src)))
+      {
+	rtx tmp = gen_reg_rtx (DFmode);
+	emit_move_insn (tmp, src);
+	src = tmp;
+      }
+
+    if (HARD_REGISTER_P (dst)
+	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst)))
+      {
+	need_extra_move_for_dst_p = true;
+	rtx tmp = gen_reg_rtx (DFmode);
+	dst = tmp;
+      }
+
+    rtx dst_high_part = simplify_gen_subreg (
+			  SImode, dst,
+			  DFmode, subreg_highpart_offset (SImode, DFmode));
+    rtx dst_low_part = simplify_gen_subreg (
+			  SImode, dst,
+			  DFmode, subreg_lowpart_offset (SImode, DFmode));
+    rtx src_high_part = simplify_gen_subreg (
+			  SImode, src,
+			  DFmode, subreg_highpart_offset (SImode, DFmode));
+    rtx src_low_part = simplify_gen_subreg (
+			  SImode, src,
+			  DFmode, subreg_lowpart_offset (SImode, DFmode));
+
+    emit_insn (gen_xorsi3 (dst_high_part,
+			   src_high_part,
+			   gen_int_mode (0x80000000, SImode)));
+    emit_move_insn (dst_low_part, src_low_part);
+
+    if (need_extra_move_for_dst_p)
+      emit_move_insn (ori_dst, dst);
+
+    DONE;
+})
 
 
 ;; ----------------------------------------------------------------------------
@@ -737,53 +945,67 @@
 ;; For V3/V3M ISA, we have 'not33' instruction.
 ;; So we can identify 'not Rt3,Ra3' case and set its length to be 2.
 (define_insn "one_cmplsi2"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r")
-	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r")
+	(not:SI (match_operand:SI 1 "register_operand" "  l, r")))]
   ""
   "@
    not33\t%0, %1
    nor\t%0, %1, %1"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
 
 
 ;; ----------------------------------------------------------------------------
 
 ;; Shift instructions.
 
-(define_insn "ashlsi3"
-  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
-	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
-		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+(define_expand "<shift>si3"
+  [(set (match_operand:SI 0 "register_operand"                      "")
+	(shift_rotate:SI (match_operand:SI 1 "register_operand"     "")
+			 (match_operand:SI 2 "nds32_rimm5u_operand" "")))]
+  ""
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"                "=$   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"     "     l,    r, r")
+		   (match_operand:SI 2 "nds32_rimm5u_operand" "  Iu03, Iu05, r")))]
   ""
   "@
-  slli333\t%0, %1, %2
-  slli\t%0, %1, %2
-  sll\t%0, %1, %2"
+   slli333\t%0, %1, %2
+   slli\t%0, %1, %2
+   sll\t%0, %1, %2"
   [(set_attr "type"   "alu,alu,alu")
    (set_attr "length" "  2,  4,  4")])
 
-(define_insn "ashrsi3"
-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+(define_insn "*ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"                  "=$   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"     "     0,    r, r")
+		     (match_operand:SI 2 "nds32_rimm5u_operand" "  Iu05, Iu05, r")))]
   ""
   "@
-  srai45\t%0, %2
-  srai\t%0, %1, %2
-  sra\t%0, %1, %2"
+   srai45\t%0, %2
+   srai\t%0, %1, %2
+   sra\t%0, %1, %2"
   [(set_attr "type"   "alu,alu,alu")
    (set_attr "length" "  2,  4,  4")])
 
-(define_insn "lshrsi3"
-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+(define_insn "*lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"                  "=$   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"     "     0,    r, r")
+		     (match_operand:SI 2 "nds32_rimm5u_operand" "  Iu05, Iu05, r")))]
   ""
   "@
-  srli45\t%0, %2
-  srli\t%0, %1, %2
-  srl\t%0, %1, %2"
+   srli45\t%0, %2
+   srli\t%0, %1, %2
+   srl\t%0, %1, %2"
   [(set_attr "type"   "alu,alu,alu")
    (set_attr "length" "  2,  4,  4")])
 
@@ -794,149 +1016,65 @@
 ;; Conditional Move patterns
 ;; ----------------------------------------------------------------------------
 
-(define_expand "movsicc"
-  [(set (match_operand:SI 0 "register_operand" "")
-	(if_then_else:SI (match_operand 1 "comparison_operator" "")
-			 (match_operand:SI 2 "register_operand" "")
-			 (match_operand:SI 3 "register_operand" "")))]
-  "TARGET_CMOV"
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(if_then_else:QIHISI (match_operand 1 "nds32_movecc_comparison_operator" "")
+			 (match_operand:QIHISI 2 "register_operand" "")
+			 (match_operand:QIHISI 3 "register_operand" "")))]
+  "TARGET_CMOV && !optimize_size"
 {
-  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
-      && GET_MODE (XEXP (operands[1], 0)) == SImode
-      && XEXP (operands[1], 1) == const0_rtx)
-    {
-      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
-         we have gcc generate original template rtx.  */
-      goto create_template;
-    }
-  else
+  enum nds32_expand_result_type result = nds32_expand_movcc (operands);
+  switch (result)
     {
-      /* Since there is only 'slt'(Set when Less Than) instruction for
-         comparison in Andes ISA, the major strategy we use here is to
-         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
-         We design constraints properly so that the reload phase will assist
-         to make one source operand to use same register as result operand.
-         Then we can use cmovz/cmovn to catch the other source operand
-         which has different register.  */
-      enum rtx_code code = GET_CODE (operands[1]);
-      enum rtx_code new_code = code;
-      rtx cmp_op0 = XEXP (operands[1], 0);
-      rtx cmp_op1 = XEXP (operands[1], 1);
-      rtx tmp;
-      int reverse = 0;
-
-      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
-         Strategy : Reverse condition and swap comparison operands
-
-         For example:
-
-             a <= b ? P : Q   (LE or LEU)
-         --> a >  b ? Q : P   (reverse condition)
-         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
-
-             a >= b ? P : Q   (GE or GEU)
-         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
-
-             a <  b ? P : Q   (LT or LTU)
-         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
-
-             a >  b ? P : Q   (GT or GTU)
-         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
-      switch (code)
-	{
-	case NE:
-	  /*   (a != b ? P : Q)
-	     can be expressed as
-	       (a == b ? Q : P)
-	     so, fall through to reverse condition */
-	case GE: case GEU: case LE: case LEU:
-	  new_code = reverse_condition (code);
-	  reverse = 1;
-	  break;
-	case EQ: case GT: case GTU: case LT: case LTU:
-	  /* no need to reverse condition */
-	  break;
-	default:
-	  FAIL;
-	}
-
-      /* For '>' comparison operator, we swap operands
-         so that we can have 'LT/LTU' operator.  */
-      if (new_code == GT || new_code == GTU)
-	{
-	  tmp     = cmp_op0;
-	  cmp_op0 = cmp_op1;
-	  cmp_op1 = tmp;
-
-	  new_code = swap_condition (new_code);
-	}
-
-      /* Use a temporary register to store slt/slts result.  */
-      tmp = gen_reg_rtx (SImode);
-
-      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
-         If we don't split it, the conditional move transformation will fail
-         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
-      if (new_code == EQ)
-	{
-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
-	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
-	}
-      else if (new_code == NE)
-	{
-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
-	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
-        }
-      else
-	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
-	emit_insn (gen_rtx_SET (VOIDmode, tmp,
-				gen_rtx_fmt_ee (new_code, SImode,
-						cmp_op0, cmp_op1)));
-
-      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
-         so that cmovz or cmovn will be matched later.
-
-         For reverse condition cases, we want to create a semantic that:
-           (eq X 0) --> pick up "else" part
-         For normal cases, we want to create a semantic that:
-           (ne X 0) --> pick up "then" part
-
-         Later we will have cmovz/cmovn instruction pattern to
-         match corresponding behavior and output instruction.  */
-      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
-				    VOIDmode, tmp, const0_rtx);
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
     }
-
-create_template:
-  do {} while(0); /* dummy line */
 })
 
-(define_insn "cmovz"
-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
-        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+(define_insn "cmovz<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
+	(if_then_else:QIHISI (eq (match_operand:SI 1 "register_operand" " r, r")
 			     (const_int 0))
-			 (match_operand:SI 2 "register_operand"     " r, 0")
-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
+			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
   "TARGET_CMOV"
   "@
    cmovz\t%0, %2, %1
    cmovn\t%0, %3, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "4")])
 
-(define_insn "cmovn"
-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
-	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+(define_insn "cmovn<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
+	(if_then_else:QIHISI (ne (match_operand:SI 1 "register_operand" " r, r")
 			     (const_int 0))
-			 (match_operand:SI 2 "register_operand"     " r, 0")
-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
+			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
   "TARGET_CMOV"
   "@
    cmovn\t%0, %2, %1
    cmovz\t%0, %3, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "4")])
 
+;; A hotfix to help RTL combiner to merge a cmovn insn and a zero_extend insn.
+;; It should be removed once after we change the expansion form of the cmovn.
+(define_insn "*cmovn_simplified_<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "=r")
+	(if_then_else:QIHISI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:QIHISI 2 "register_operand" "r")
+			 (match_operand:QIHISI 3 "register_operand" "0")))]
+  ""
+  "cmovn\t%0, %2, %1"
+  [(set_attr "type" "alu")])
 
 ;; ----------------------------------------------------------------------------
 ;; Conditional Branch patterns
@@ -951,573 +1089,188 @@
 		      (pc)))]
   ""
 {
-  rtx tmp_reg;
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* If operands[2] is (const_int 0),
-     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
-     So we have gcc generate original template rtx.  */
-  if (GET_CODE (operands[2]) == CONST_INT)
-    if (INTVAL (operands[2]) == 0)
-      if ((code != GTU)
-	  && (code != GEU)
-	  && (code != LTU)
-	  && (code != LEU))
-	goto create_template;
-
-  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
-     behavior for the comparison, we might need to generate other
-     rtx patterns to achieve same semantic.  */
-  switch (code)
-    {
-    case GT:
-    case GTU:
-      if (GET_CODE (operands[2]) == CONST_INT)
-	{
-	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  /* We want to plus 1 into the integer value
-	     of operands[2] to create 'slt' instruction.
-	     This caculation is performed on the host machine,
-	     which may be 64-bit integer.
-	     So the meaning of caculation result may be
-	     different from the 32-bit nds32 target.
-
-	     For example:
-	       0x7fffffff + 0x1 -> 0x80000000,
-	       this value is POSITIVE on 64-bit machine,
-	       but the expected value on 32-bit nds32 target
-	       should be NEGATIVE value.
-
-	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
-	     explicitly create SImode constant rtx.  */
-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
-
-	  if (code == GT)
-	    {
-	      /* GT, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	    }
-	  else
-	    {
-	      /* GTU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	    }
-
-	  PUT_CODE (operands[0], EQ);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-      else
-	{
-	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  if (code == GT)
-	    {
-	      /* GT, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
-	    }
-	  else
-	    {
-	      /* GTU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
-	    }
-
-	  PUT_CODE (operands[0], NE);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-
-    case GE:
-    case GEU:
-      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
-      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-      if (code == GE)
-	{
-	  /* GE, use slts instruction */
-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	}
-      else
-	{
-	  /* GEU, use slt instruction */
-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	}
-
-      PUT_CODE (operands[0], EQ);
-      operands[1] = tmp_reg;
-      operands[2] = const0_rtx;
-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				 operands[2], operands[3]));
-
-      DONE;
-
-    case LT:
-    case LTU:
-      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
-      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-      if (code == LT)
-	{
-	  /* LT, use slts instruction */
-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	}
-      else
-	{
-	  /* LTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	}
-
-      PUT_CODE (operands[0], NE);
-      operands[1] = tmp_reg;
-      operands[2] = const0_rtx;
-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				 operands[2], operands[3]));
-
+  enum nds32_expand_result_type result = nds32_expand_cbranch (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
       DONE;
-
-    case LE:
-    case LEU:
-      if (GET_CODE (operands[2]) == CONST_INT)
-	{
-	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
-	     We better have an assert here in case GCC does not properly
-	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
-	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
-
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	    }
-
-	  PUT_CODE (operands[0], NE);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-      else
-	{
-	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
-	    }
-
-	  PUT_CODE (operands[0], EQ);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-
-    case EQ:
-    case NE:
-      /* NDS32 ISA has various form for eq/ne behavior no matter
-         what kind of the operand is.
-         So just generate original template rtx.  */
-      goto create_template;
-
-    default:
+      break;
+    case EXPAND_FAIL:
       FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
     }
-
-create_template:
-  do {} while(0); /* dummy line */
 })
 
 
-(define_insn "*cbranchsi4_equality_zero"
+(define_insn "cbranchsi4_equality_zero"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			[(match_operand:SI 1 "register_operand"  "$t,$l, r")
 			 (const_int 0)])
 		      (label_ref (match_operand 2 "" ""))
 		      (pc)))]
   ""
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This zero-comparison conditional branch has two forms:
-       32-bit instruction =>          beqz/bnez           imm16s << 1
-       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
-
-     For 32-bit case,
-     we assume it is always reachable. (but check range -65500 ~ 65500)
-
-     For 16-bit case,
-     it must satisfy { 255 >= (label - pc) >= -256 } condition.
-     However, since the $pc for nds32 is at the beginning of the instruction,
-     we should leave some length space for current insn.
-     So we use range -250 ~ 250.  */
-
-  switch (get_attr_length (insn))
-    {
-    case 2:
-      if (which_alternative == 0)
-	{
-	  /* constraint: t */
-	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
-	}
-      else if (which_alternative == 1)
-	{
-	  /* constraint: l */
-	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
-	}
-      else
-	{
-	  /* constraint: r */
-	  /* For which_alternative==2, it should not be here.  */
-	  gcc_unreachable ();
-	}
-    case 4:
-      /* including constraints: t, l, and r */
-      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
-    case 6:
-      if (which_alternative == 0)
-	{
-	  /* constraint: t */
-	  if (code == EQ)
-	    {
-	      /*    beqzs8  .L0
-	          =>
-	            bnezs8  .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	  else
-	    {
-	      /*    bnezs8  .L0
-	          =>
-	            beqzs8  .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	}
-      else if (which_alternative == 1)
-	{
-	  /* constraint: l */
-	  if (code == EQ)
-	    {
-	      /*    beqz38  $r0, .L0
-	          =>
-	            bnez38  $r0, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	  else
-	    {
-	      /*    bnez38  $r0, .L0
-	          =>
-	            beqz38  $r0, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	}
-      else
-	{
-	  /* constraint: r */
-	  /* For which_alternative==2, it should not be here.  */
-	  gcc_unreachable ();
-	}
-    case 8:
-      /* constraint: t, l, r.  */
-      if (code == EQ)
-	{
-	  /*    beqz  $r8, .L0
-	      =>
-	        bnez  $r8, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	}
-      else
-	{
-	  /*    bnez  $r8, .L0
-	      =>
-	        beqz  $r8, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	}
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "branch")
-   (set_attr "enabled" "1")
-   (set_attr_alternative "length"
-     [
-       ;; Alternative 0
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
-		     (if_then_else (match_test "TARGET_16_BIT")
-				   (const_int 2)
-				   (const_int 4))
-		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-					(le (minus (match_dup 2) (pc)) (const_int  65500)))
-				   (const_int 4)
-				   (if_then_else (match_test "TARGET_16_BIT")
-						 (const_int 6)
-						 (const_int 8))))
-       ;; Alternative 1
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
-		     (if_then_else (match_test "TARGET_16_BIT")
-				   (const_int 2)
-				   (const_int 4))
-		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-					(le (minus (match_dup 2) (pc)) (const_int  65500)))
-				   (const_int 4)
-				   (if_then_else (match_test "TARGET_16_BIT")
-						 (const_int 6)
-						 (const_int 8))))
-       ;; Alternative 2
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
-		     (const_int 4)
-		     (const_int 8))
-     ])])
+  return nds32_output_cbranchsi4_equality_zero (insn, operands);
+}
+  [(set_attr "type" "branch")
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 2
+       (const_string "yes")
+     ])
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+					(le (minus (match_dup 2) (pc)) (const_int  250)))
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 2)
+						 (const_int 4))
+				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
+						 (const_int 4)
+						 (if_then_else (match_test "TARGET_16_BIT")
+							       (const_int 8)
+							       (const_int 10))))
+		     (const_int 10))
+       ;; Alternative 1
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+					(le (minus (match_dup 2) (pc)) (const_int  250)))
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 2)
+						 (const_int 4))
+				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
+						 (const_int 4)
+						 (if_then_else (match_test "TARGET_16_BIT")
+							       (const_int 8)
+							       (const_int 10))))
+		     (const_int 10))
+       ;; Alternative 2
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (const_int 10))
+		     (const_int 10))
+     ])])
 
 
 ;; This pattern is dedicated to V2 ISA,
 ;; because V2 DOES NOT HAVE beqc/bnec instruction.
-(define_insn "*cbranchsi4_equality_reg"
+(define_insn "cbranchsi4_equality_reg"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"           "r")
-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+			[(match_operand:SI 1 "register_operand" "$v, r")
+			 (match_operand:SI 2 "register_operand" " l, r")])
 		      (label_ref (match_operand 3 "" ""))
 		      (pc)))]
   "TARGET_ISA_V2"
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This register-comparison conditional branch has one form:
-       32-bit instruction =>          beq/bne           imm14s << 1
-
-     For 32-bit case,
-     we assume it is always reachable. (but check range -16350 ~ 16350).  */
-
-  switch (code)
-    {
-    case EQ:
-      /* r, r */
-      switch (get_attr_length (insn))
-	{
-	case 4:
-	  return "beq\t%1, %2, %3";
-	case 8:
-	  /*    beq  $r0, $r1, .L0
-	      =>
-	        bne  $r0, $r1, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-
-    case NE:
-      /* r, r */
-      switch (get_attr_length (insn))
-	{
-	case 4:
-	  return "bne\t%1, %2, %3";
-	case 8:
-	  /*    bne  $r0, $r1, .L0
-	      =>
-	        beq  $r0, $r1, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_equality_reg (insn, operands);
 }
   [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
-			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
-		      (const_int 4)
-		      (const_int 8)))])
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (const_string "yes")
+     ])
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 2)
+				   (if_then_else (and (ge (minus (match_dup 3) (pc))
+							  (const_int -16350))
+						      (le (minus (match_dup 3) (pc))
+							  (const_int  16350)))
+						 (const_int 4)
+						 (const_int 8)))
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+					(le (minus (match_dup 3) (pc)) (const_int  16350)))
+				   (const_int 4)
+				   (const_int 10))
+		     (const_int 10))
+     ])])
 
 
 ;; This pattern is dedicated to V3/V3M,
 ;; because V3/V3M DO HAVE beqc/bnec instruction.
-(define_insn "*cbranchsi4_equality_reg_or_const_int"
+(define_insn "cbranchsi4_equality_reg_or_const_int"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"           "r,    r")
-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+			[(match_operand:SI 1 "register_operand"      "$v, r,    r")
+			 (match_operand:SI 2 "nds32_rimm11s_operand" " l, r, Is11")])
 		      (label_ref (match_operand 3 "" ""))
 		      (pc)))]
   "TARGET_ISA_V3 || TARGET_ISA_V3M"
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This register-comparison conditional branch has one form:
-       32-bit instruction =>          beq/bne           imm14s << 1
-       32-bit instruction =>         beqc/bnec          imm8s << 1
-
-     For 32-bit case, we assume it is always reachable.
-     (but check range -16350 ~ 16350 and -250 ~ 250).  */
-
-  switch (code)
-    {
-    case EQ:
-      if (which_alternative == 0)
-	{
-	  /* r, r */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "beq\t%1, %2, %3";
-	    case 8:
-	      /*    beq  $r0, $r1, .L0
-	          =>
-	            bne  $r0, $r1, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-      else
-	{
-	  /* r, Is11 */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "beqc\t%1, %2, %3";
-	    case 8:
-	      /*    beqc  $r0, constant, .L0
-	          =>
-	            bnec  $r0, constant, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-    case NE:
-      if (which_alternative == 0)
-	{
-	  /* r, r */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "bne\t%1, %2, %3";
-	    case 8:
-	      /*    bne  $r0, $r1, .L0
-	          =>
-	            beq  $r0, $r1, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-      else
-	{
-	  /* r, Is11 */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "bnec\t%1, %2, %3";
-	    case 8:
-	      /*    bnec  $r0, constant, .L0
-	          =>
-	            beqc  $r0, constant, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_equality_reg_or_const_int (insn, operands);
 }
   [(set_attr "type"   "branch")
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (const_string "yes")
+       ;; Alternative 2
+       (const_string "yes")
+     ])
    (set_attr_alternative "length"
      [
        ;; Alternative 0
-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
-			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
-		     (const_int 4)
-		     (const_int 8))
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 2)
+				   (if_then_else (and (ge (minus (match_dup 3) (pc))
+							  (const_int -16350))
+						      (le (minus (match_dup 3) (pc))
+							  (const_int  16350)))
+						 (const_int 4)
+						 (const_int 8)))
+		    (const_int 8))
        ;; Alternative 1
-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-			  (le (minus (match_dup 3) (pc)) (const_int  250)))
-		     (const_int 4)
-		     (const_int 8))
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+					(le (minus (match_dup 3) (pc)) (const_int  16350)))
+				   (const_int 4)
+				   (const_int 10))
+		    (const_int 10))
+       ;; Alternative 2
+       (if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 4)
+				   (const_int 10))
+		    (const_int 10))
      ])])
 
 
@@ -1530,80 +1283,16 @@
 		      (pc)))]
   ""
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This zero-greater-less-comparison conditional branch has one form:
-       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
-
-     For 32-bit case, we assume it is always reachable.
-     (but check range -65500 ~ 65500).  */
-
-  if (get_attr_length (insn) == 8)
-    {
-      /* The branch target is too far to simply use one
-         bgtz/bgez/bltz/blez instruction.
-         We need to reverse condition and use 'j' to jump to the target.  */
-      switch (code)
-	{
-	case GT:
-	  /*   bgtz  $r8, .L0
-	     =>
-	       blez  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case GE:
-	  /*   bgez  $r8, .L0
-	     =>
-	       bltz  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case LT:
-	  /*   bltz  $r8, .L0
-	     =>
-	       bgez  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case LE:
-	  /*   blez  $r8, .L0
-	     =>
-	       bgtz  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-    }
-
-  switch (code)
-    {
-    case GT:
-      return "bgtz\t%1, %2";
-    case GE:
-      return "bgez\t%1, %2";
-    case LT:
-      return "bltz\t%1, %2";
-    case LE:
-      return "blez\t%1, %2";
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_greater_less_zero (insn, operands);
 }
   [(set_attr "type"   "branch")
    (set (attr "length")
-        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
-		      (const_int 4)
-		      (const_int 8)))])
+	(if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		      (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					 (le (minus (match_dup 2) (pc)) (const_int  65500)))
+				    (const_int 4)
+				    (const_int 10))
+		      (const_int 10)))])
 
 
 (define_expand "cstoresi4"
@@ -1613,237 +1302,85 @@
 	   (match_operand:SI 3 "nonmemory_operand" "")]))]
   ""
 {
-  rtx tmp_reg;
-  enum rtx_code code;
-
-  code = GET_CODE (operands[1]);
-
-  switch (code)
+  enum nds32_expand_result_type result = nds32_expand_cstore (operands);
+  switch (result)
     {
-    case EQ:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A == const_int_B)
-	     --> addi reg_C, reg_A, -const_int_B
-	         slti reg_R, reg_C, const_int_1 */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
-	  /* If the integer value is not in the range of imm15s,
-	     we need to force register first because our addsi3 pattern
-	     only accept nds32_rimm15s_operand predicate.  */
-	  if (!satisfies_constraint_Is15 (operands[3]))
-	    operands[3] = force_reg (SImode, operands[3]);
-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A == reg_B)
-	     --> xor  reg_C, reg_A, reg_B
-	         slti reg_R, reg_C, const_int_1 */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
-
-	  DONE;
-	}
-
-    case NE:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A != const_int_B)
-	     --> addi reg_C, reg_A, -const_int_B
-	         slti reg_R, const_int_0, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
-	  /* If the integer value is not in the range of imm15s,
-	     we need to force register first because our addsi3 pattern
-	     only accept nds32_rimm15s_operand predicate.  */
-	  if (!satisfies_constraint_Is15 (operands[3]))
-	    operands[3] = force_reg (SImode, operands[3]);
-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A != reg_B)
-	     --> xor  reg_C, reg_A, reg_B
-	         slti reg_R, const_int_0, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
-
-	  DONE;
-	}
-
-    case GT:
-    case GTU:
-      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
-      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
-      if (code == GT)
-	{
-	  /* GT, use slts instruction */
-	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
-	}
-      else
-	{
-	  /* GTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
-	}
-
-      DONE;
-
-    case GE:
-    case GEU:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A >= const_int_B)
-	     --> movi reg_C, const_int_B - 1
-	         slt  reg_R, reg_C, reg_A */
-	  tmp_reg = gen_reg_rtx (SImode);
-
-	  emit_insn (gen_movsi (tmp_reg,
-				gen_int_mode (INTVAL (operands[3]) - 1,
-					      SImode)));
-	  if (code == GE)
-	    {
-	      /* GE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
-	    }
-	  else
-	    {
-	      /* GEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
-	    }
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A >= reg_B)
-	     --> slt  reg_R, reg_A, reg_B
-	         xori reg_R, reg_R, const_int_1 */
-	  if (code == GE)
-	    {
-	      /* GE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0],
-					   operands[2], operands[3]));
-	    }
-	  else
-	    {
-	      /* GEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0],
-					   operands[2], operands[3]));
-	    }
-
-	  /* perform 'not' behavior */
-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
-
-	  DONE;
-	}
-
-    case LT:
-    case LTU:
-      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
-      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
-      if (code == LT)
-	{
-	  /* LT, use slts instruction */
-	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
-	}
-      else
-	{
-	  /* LTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
-	}
-
+    case EXPAND_DONE:
       DONE;
-
-    case LE:
-    case LEU:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A <= const_int_B)
-	     --> movi reg_C, const_int_B + 1
-	         slt  reg_R, reg_A, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-
-	  emit_insn (gen_movsi (tmp_reg,
-				gen_int_mode (INTVAL (operands[3]) + 1,
-						      SImode)));
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
-	    }
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
-	                                  xori reg_R, reg_R, const_int_1 */
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0],
-					   operands[3], operands[2]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0],
-					   operands[3], operands[2]));
-	    }
-
-	  /* perform 'not' behavior */
-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
-
-	  DONE;
-	}
-
-
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
     default:
       gcc_unreachable ();
     }
 })
 
 
-(define_insn "slts_compare"
-  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
-	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
-	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+(define_expand "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"       "")
+	(lt:SI (match_operand:SI 1 "general_operand" "")
+	       (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "slts_compare_impl"
+  [(set (match_operand:SI 0 "register_operand"             "=$t,$   t, r,    r")
+	(lt:SI (match_operand:SI 1 "register_operand"      "  d,    d, r,    r")
+	       (match_operand:SI 2 "nds32_rimm15s_operand" "  r, Iu05, r, Is15")))]
   ""
   "@
    slts45\t%1, %2
    sltsi45\t%1, %2
    slts\t%0, %1, %2
    sltsi\t%0, %1, %2"
-  [(set_attr "type"   "compare,compare,compare,compare")
-   (set_attr "length" "      2,      2,      4,      4")])
+  [(set_attr "type"   "alu,    alu,    alu,    alu")
+   (set_attr "length" "  2,      2,      4,      4")])
+
+(define_insn "slt_eq0"
+  [(set (match_operand:SI 0 "register_operand"        "=$t, r")
+	(eq:SI (match_operand:SI 1 "register_operand" "  d, r")
+	       (const_int 0)))]
+  ""
+  "@
+   slti45\t%1, 1
+   slti\t%0, %1, 1"
+  [(set_attr "type"   "alu, alu")
+   (set_attr "length" "  2,   4")])
+
+(define_expand "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"        "")
+	(ltu:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
 
-(define_insn "slt_compare"
-  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
-	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
-		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "slt_compare_impl"
+  [(set (match_operand:SI 0 "register_operand"              "=$t,$   t, r,    r")
+	(ltu:SI (match_operand:SI 1 "register_operand"      "  d,    d, r,    r")
+		(match_operand:SI 2 "nds32_rimm15s_operand" "  r, Iu05, r, Is15")))]
   ""
   "@
    slt45\t%1, %2
    slti45\t%1, %2
    slt\t%0, %1, %2
    slti\t%0, %1, %2"
-  [(set_attr "type"   "compare,compare,compare,compare")
-   (set_attr "length" "      2,      2,      4,      4")])
-
+  [(set_attr "type"   "alu,    alu,    alu,    alu")
+   (set_attr "length" "  2,      2,      4,      4")])
 
 ;; ----------------------------------------------------------------------------
 
@@ -1875,12 +1412,14 @@
     }
 }
   [(set_attr "type" "branch")
-   (set_attr "enabled" "1")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
-			   (le (minus (match_dup 0) (pc)) (const_int  250)))
-		      (if_then_else (match_test "TARGET_16_BIT")
-				    (const_int 2)
+	(if_then_else (match_test "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+		      (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+					 (le (minus (match_dup 0) (pc)) (const_int  250)))
+				    (if_then_else (match_test "TARGET_16_BIT")
+						  (const_int 2)
+						  (const_int 4))
 				    (const_int 4))
 		      (const_int 4)))])
 
@@ -1888,11 +1427,24 @@
   [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
   ""
   "@
-  jr5\t%0
-  jr\t%0"
+   jr5\t%0
+   jr\t%0"
   [(set_attr "type"   "branch,branch")
    (set_attr "length" "     2,     4")])
 
+(define_insn "*cond_indirect_jump"
+  [(cond_exec (ne (match_operand:SI 0 "register_operand"       "r")
+		  (const_int 0))
+	      (set (pc) (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "jrnez\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Normal call patterns.
+
 ;; Subroutine call instruction returning no value.
 ;;   operands[0]: It should be a mem RTX whose address is
 ;;                the the address of the function.
@@ -1902,31 +1454,126 @@
 (define_expand "call"
   [(parallel [(call (match_operand 0 "memory_operand" "")
 		    (match_operand 1))
-	      (clobber (reg:SI LP_REGNUM))])]
-  ""
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
   ""
+  "nds32_expand_call_address (&operands[0]);"
 )
 
-(define_insn "*call_register"
+(define_insn "call_register_align"
   [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
 		    (match_operand 1))
-	      (clobber (reg:SI LP_REGNUM))])]
-  ""
-  "@
-  jral5\t%0
-  jral\t%0"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "NDS32_ALIGN_P ()"
+{
+  rtx next_insn = next_active_insn (insn);
+  bool align_p = !(next_insn && get_attr_length (next_insn) == 2);
+  switch (which_alternative)
+    {
+    case 0:
+      if (align_p)
+	return "jral5\t%0\;.align 2";
+      else
+	return "jral5\t%0";
+    case 1:
+      if (align_p)
+	return "jral\t%0\;.align 2";
+      else
+	return "jral\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
 
-(define_insn "*call_immediate"
-  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+(define_insn "call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
 		    (match_operand 1))
-	      (clobber (reg:SI LP_REGNUM))])]
-  ""
-  "jal\t%0"
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "!NDS32_ALIGN_P ()"
+  "@
+   jral5\t%0
+   jral\t%0"
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
+
+(define_insn "*cond_call_register"
+  [(cond_exec (ne (match_operand:SI 0 "register_operand"                   "r")
+		  (const_int 0))
+	      (parallel [(call (mem (match_operand:SI 1 "register_operand" "0"))
+			       (match_operand 2))
+			 (clobber (reg:SI LP_REGNUM))
+			 (clobber (reg:SI TA_REGNUM))]))]
+  "TARGET_ISA_V3"
+  "jralnez\t%0"
   [(set_attr "type"   "branch")
    (set_attr "length"      "4")])
 
+(define_insn "call_immediate_align"
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_symbolic_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "NDS32_ALIGN_P ()"
+{
+  rtx next_insn = next_active_insn (insn);
+  bool align_p = next_insn && get_attr_length (next_insn) != 2;
+
+  return nds32_output_call (insn, operands, "bal\t%0", "jal\t%0", align_p);
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
+
+(define_insn "call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_symbolic_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "!NDS32_ALIGN_P ()"
+{
+  return nds32_output_call (insn, operands, "bal\t%0", "jal\t%0", false);
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
+
+
+(define_insn "*cond_call_immediate"
+  [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator"
+		[(match_operand:SI 1 "register_operand"                     "r")
+		 (const_int 0)])
+	      (parallel [(call (mem (match_operand:SI 2 "nds32_symbolic_operand" "i"))
+			       (match_operand 3))
+			 (clobber (reg:SI LP_REGNUM))
+			 (clobber (reg:SI TA_REGNUM))]))]
+  "!flag_pic && !TARGET_CMODEL_LARGE"
+{
+  switch (GET_CODE (operands[0]))
+    {
+    case LT:
+      return "bltzal\t%1, %2";
+    case GE:
+      return "bgezal\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "branch")
+   (set_attr "length"       "4")])
 
 ;; Subroutine call instruction returning a value.
 ;;   operands[0]: It is the hard regiser in which the value is returned.
@@ -1938,58 +1585,319 @@
   [(parallel [(set (match_operand 0)
 		   (call (match_operand 1 "memory_operand" "")
 		         (match_operand 2)))
-	      (clobber (reg:SI LP_REGNUM))])]
-  ""
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
   ""
+  "nds32_expand_call_address (&operands[1]);"
 )
 
-(define_insn "*call_value_register"
+(define_insn "call_value_register_align"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		         (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "NDS32_ALIGN_P ()"
+{
+  rtx next_insn = next_active_insn (insn);
+  bool align_p = !(next_insn && get_attr_length (next_insn) == 2);
+  switch (which_alternative)
+    {
+    case 0:
+      if (align_p)
+	return "jral5\t%1\;.align 2";
+      else
+	return "jral5\t%1";
+    case 1:
+      if (align_p)
+	return "jral\t%1\;.align 2";
+      else
+	return "jral\t%1";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
+
+(define_insn "call_value_register"
   [(parallel [(set (match_operand 0)
 		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
 		         (match_operand 2)))
-	      (clobber (reg:SI LP_REGNUM))])]
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "!NDS32_ALIGN_P ()"
+  "@
+   jral5\t%1
+   jral\t%1"
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
+
+(define_insn "*cond_call_value_register"
+  [(cond_exec (ne (match_operand:SI 0 "register_operand"                        "r")
+		  (const_int 0))
+	      (parallel [(set (match_operand 1)
+			      (call (mem (match_operand:SI 2 "register_operand" "0"))
+				    (match_operand 3)))
+			 (clobber (reg:SI LP_REGNUM))
+			 (clobber (reg:SI TA_REGNUM))]))]
+  "TARGET_ISA_V3"
+  "jralnez\t%0"
+  [(set_attr "type"    "branch")
+   (set_attr "length"       "4")])
+
+(define_insn "call_value_immediate_align"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "nds32_symbolic_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "NDS32_ALIGN_P ()"
+{
+  rtx next_insn = next_active_insn (insn);
+  bool align_p = next_insn && get_attr_length (next_insn) != 2;
+
+  return nds32_output_call (insn, operands, "bal\t%1", "jal\t%1", align_p);
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
+
+(define_insn "call_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "nds32_symbolic_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI LP_REGNUM))
+	      (clobber (reg:SI TA_REGNUM))])]
+  "!NDS32_ALIGN_P ()"
+{
+  return nds32_output_call (insn, operands, "bal\t%1", "jal\t%1", false);
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
+
+
+(define_insn "*cond_call_value_immediate"
+  [(cond_exec (match_operator 0 "nds32_conditional_call_comparison_operator"
+		[(match_operand:SI 1 "register_operand"                          "r")
+		 (const_int 0)])
+	      (parallel [(set (match_operand 2)
+			      (call (mem (match_operand:SI 3 "nds32_symbolic_operand" "i"))
+				    (match_operand 4)))
+			 (clobber (reg:SI LP_REGNUM))
+			 (clobber (reg:SI TA_REGNUM))]))]
+  "!flag_pic && !TARGET_CMODEL_LARGE"
+{
+  switch (GET_CODE (operands[0]))
+    {
+    case LT:
+      return "bltzal\t%1, %3";
+    case GE:
+      return "bgezal\t%1, %3";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+  DONE;
+})
+
+;; ----------------------------------------------------------------------------
+
+;; The sibcall patterns.
+
+;; sibcall
+;; sibcall_register
+;; sibcall_immediate
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (const_int 0))
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
+  ""
+  "nds32_expand_call_address (&operands[0]);"
+)
+
+(define_insn "*sibcall_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
+		    (match_operand 1))
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
   ""
   "@
-  jral5\t%1
-  jral\t%1"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
+   jr5\t%0
+   jr\t%0"
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
+
+(define_insn "*sibcall_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_symbolic_operand" "i"))
+		    (match_operand 1))
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
+  ""
+{
+  if (TARGET_CMODEL_LARGE)
+    return "b\t%0";
+  else
+    return "j\t%0";
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
+
+;; sibcall_value
+;; sibcall_value_register
+;; sibcall_value_immediate
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0)
+		   (call (match_operand 1 "memory_operand" "")
+			 (const_int 0)))
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
+  ""
+  "nds32_expand_call_address (&operands[1]);"
+)
 
-(define_insn "*call_value_immediate"
+(define_insn "*sibcall_value_register"
   [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
 			 (match_operand 2)))
-	      (clobber (reg:SI LP_REGNUM))])]
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
   ""
-  "jal\t%1"
+  "@
+   jr5\t%1
+   jr\t%1"
+  [(set_attr "type"      "branch,branch")
+   (set_attr "length"    "     2,     4")
+   (set_attr "relaxable" "    no,   yes")])
+
+(define_insn "*sibcall_value_immediate"
+  [(parallel [(set (match_operand 0)
+		   (call (mem (match_operand:SI 1 "nds32_symbolic_operand" "i"))
+			 (match_operand 2)))
+	      (clobber (reg:SI TA_REGNUM))
+	      (return)])]
+  ""
+{
+  if (TARGET_CMODEL_LARGE)
+    return "b\t%1";
+  else
+    return "j\t%1";
+}
   [(set_attr "type"   "branch")
-   (set_attr "length"      "4")])
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+	              (const_int 16)
+	              (if_then_else (match_test "TARGET_CMODEL_LARGE")
+		                    (const_int 12)
+		                    (const_int 4))))])
 
+;; ----------------------------------------------------------------------------
 
-;; prologue and epilogue.
+;; The prologue and epilogue.
 
 (define_expand "prologue" [(const_int 0)]
   ""
 {
-  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
-  if (TARGET_V3PUSH)
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.
+     In addition, we need to check if v3push is indeed available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
     nds32_expand_prologue_v3push ();
   else
     nds32_expand_prologue ();
+
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
+
   DONE;
 })
 
 (define_expand "epilogue" [(const_int 0)]
   ""
 {
-  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
-  if (TARGET_V3PUSH)
-    nds32_expand_epilogue_v3pop ();
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
+
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.
+     In addition, we need to check if v3push is indeed available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
+    nds32_expand_epilogue_v3pop (false);
   else
-    nds32_expand_epilogue ();
+    nds32_expand_epilogue (false);
+
   DONE;
 })
 
+(define_expand "sibcall_epilogue" [(const_int 0)]
+  ""
+{
+  /* Pass true to indicate that this is sibcall epilogue and
+     exit from a function without the final branch back to the
+     calling function.  */
+  nds32_expand_epilogue (true);
+
+  DONE;
+})
 
 ;; nop instruction.
 
@@ -2003,7 +1911,7 @@
     return "nop";
 }
   [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+   (set_attr "enabled" "yes")
    (set (attr "length")
 	(if_then_else (match_test "TARGET_16_BIT")
 		      (const_int 2)
@@ -2025,12 +1933,13 @@
      ])]
   ""
 {
-  return nds32_output_stack_push ();
+  return nds32_output_stack_push (operands[0]);
 }
-  [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+  [(set_attr "type" "store_multiple")
+   (set_attr "combo" "12")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_V3PUSH")
+	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
 		      (const_int 2)
 		      (const_int 4)))])
 
@@ -2045,41 +1954,82 @@
      ])]
   ""
 {
-  return nds32_output_stack_pop ();
+  return nds32_output_stack_pop (operands[0]);
 }
-  [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+  [(set_attr "type" "load_multiple")
+   (set_attr "combo" "12")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_V3PUSH")
+	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
 		      (const_int 2)
 		      (const_int 4)))])
 
 
 ;; ----------------------------------------------------------------------------
-;; unspec operation patterns
+;; Return operation patterns
 ;; ----------------------------------------------------------------------------
 
-;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
-;; This pattern is designed to distinguish function return
-;; from general indirect_jump pattern so that we can directly
-;; generate 'ret5' for readability.
+;; Use this pattern to expand a return instruction
+;; with simple_return rtx if no epilogue is required.
+(define_expand "return"
+  [(parallel [(return)
+              (clobber (reg:SI FP_REGNUM))])]
+  "nds32_can_use_return_insn ()"
+{
+  /* Emit as the simple return.  */
+  if (!cfun->machine->fp_as_gp_p
+      && cfun->machine->naked_p
+      && (cfun->machine->va_args_size == 0))
+    {
+      emit_jump_insn (gen_return_internal ());
+      DONE;
+    }
+})
 
-(define_insn "unspec_volatile_func_return"
-  [(set (pc)
-	(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))]
+;; This pattern is expanded only by the shrink-wrapping optimization
+;; on paths where the function prologue has not been executed.
+;; However, such optimization may reorder the prologue/epilogue blocks
+;; together with basic blocks within function body.
+;; So we must disable this pattern if we have already decided
+;; to perform fp_as_gp optimization, which requires prologue to be
+;; first block and epilogue to be last block.
+(define_expand "simple_return"
+  [(simple_return)]
+  "!cfun->machine->fp_as_gp_p"
+  ""
+)
+
+(define_insn "*nds32_return"
+  [(parallel [(return)
+   (clobber (reg:SI FP_REGNUM))])]
+  ""
+{
+  return nds32_output_return ();
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "yes")
+   (set_attr "length" "4")])
+
+(define_insn "return_internal"
+  [(simple_return)]
   ""
 {
+  if (nds32_isr_function_critical_p (current_function_decl))
+    return "iret";
+
   if (TARGET_16_BIT)
     return "ret5";
   else
     return "ret";
 }
-  [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_16_BIT")
-		      (const_int 2)
-		      (const_int 4)))])
+	(if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)")
+		      (const_int 4)
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))))])
 
 
 ;; ----------------------------------------------------------------------------
@@ -2114,6 +2064,7 @@
 {
   rtx add_tmp;
   rtx reg, test;
+  rtx tmp_reg;
 
   /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
   if (operands[1] != const0_rtx)
@@ -2122,8 +2073,8 @@
       add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode);
 
       /* If the integer value is not in the range of imm15s,
-         we need to force register first because our addsi3 pattern
-         only accept nds32_rimm15s_operand predicate.  */
+	 we need to force register first because our addsi3 pattern
+	 only accept nds32_rimm15s_operand predicate.  */
       add_tmp = force_reg (SImode, add_tmp);
 
       emit_insn (gen_addsi3 (reg, operands[0], add_tmp));
@@ -2135,11 +2086,14 @@
   emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
 				  operands[4]));
 
-  operands[5] = gen_reg_rtx (SImode);
-  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  tmp_reg = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register tmp_reg.  */
+  if (flag_pic)
+    emit_use (pic_offset_table_rtx);
+
   emit_jump_insn (gen_casesi_internal (operands[0],
 				       operands[3],
-				       operands[5]));
+				       tmp_reg));
   DONE;
 })
 
@@ -2166,7 +2120,7 @@
 					     (const_int 4))
 				    (label_ref (match_operand 1 "" "")))))
 	      (use (label_ref (match_dup 1)))
-	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (match_operand:SI 2 "register_operand" "=r"))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
 {
@@ -2175,17 +2129,34 @@
   else
     return nds32_output_casesi (operands);
 }
-  [(set_attr "length" "20")
-   (set_attr "type" "alu")])
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+		      (const_int 28)
+		      (const_int 20)))])
 
 ;; ----------------------------------------------------------------------------
 
 ;; Performance Extension
 
+; If -fwrapv option is issued, GCC expects there will be
+; signed overflow situation.  So the ABS(INT_MIN) is still INT_MIN
+; (e.g. ABS(0x80000000)=0x80000000).
+; However, the hardware ABS instruction of nds32 target
+; always performs saturation: abs 0x80000000 -> 0x7fffffff.
+; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented.
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv"
+  "abs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
 (define_insn "clzsi2"
   [(set (match_operand:SI 0 "register_operand"         "=r")
 	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "clz\t%0, %1"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
@@ -2194,28 +2165,436 @@
   [(set (match_operand:SI 0 "register_operand"          "=r")
 	(smax:SI (match_operand:SI 1 "register_operand" " r")
 		 (match_operand:SI 2 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "max\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
 
+(define_expand "uminqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(umin:QI (match_operand:QI 1 "register_operand" "")
+		 (match_operand:QI 2 "register_operand" "")))]
+  "TARGET_EXT_PERF"
+{
+  rtx tmpop[3];
+  tmpop[0] = gen_reg_rtx (SImode);
+  tmpop[1] = gen_reg_rtx (SImode);
+  tmpop[2] = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendqisi2 (tmpop[1], operands[1]));
+  emit_insn (gen_zero_extendqisi2 (tmpop[2], operands[2]));
+  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
+  convert_move (operands[0], tmpop[0], false);
+  DONE;
+})
+
+(define_expand "sminqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(smin:QI (match_operand:QI 1 "register_operand" "")
+		 (match_operand:QI 2 "register_operand" "")))]
+  "TARGET_EXT_PERF"
+{
+  rtx tmpop[3];
+  tmpop[0] = gen_reg_rtx (SImode);
+  tmpop[1] = gen_reg_rtx (SImode);
+  tmpop[2] = gen_reg_rtx (SImode);
+
+  emit_insn (gen_extendqisi2 (tmpop[1], operands[1]));
+  emit_insn (gen_extendqisi2 (tmpop[2], operands[2]));
+  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
+  convert_move (operands[0], tmpop[0], false);
+  DONE;
+})
+
+(define_expand "uminhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(umin:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "register_operand" "")))]
+  "TARGET_EXT_PERF"
+{
+  rtx tmpop[3];
+  tmpop[0] = gen_reg_rtx (SImode);
+  tmpop[1] = gen_reg_rtx (SImode);
+  tmpop[2] = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendhisi2 (tmpop[1], operands[1]));
+  emit_insn (gen_zero_extendhisi2 (tmpop[2], operands[2]));
+  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
+  convert_move (operands[0], tmpop[0], false);
+  DONE;
+})
+
+(define_expand "sminhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(smin:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "register_operand" "")))]
+  "TARGET_EXT_PERF"
+{
+  rtx tmpop[3];
+  tmpop[0] = gen_reg_rtx (SImode);
+  tmpop[1] = gen_reg_rtx (SImode);
+  tmpop[2] = gen_reg_rtx (SImode);
+
+  emit_insn (gen_extendhisi2 (tmpop[1], operands[1]));
+  emit_insn (gen_extendhisi2 (tmpop[2], operands[2]));
+  emit_insn (gen_sminsi3 (tmpop[0], tmpop[1], tmpop[2]));
+  convert_move (operands[0], tmpop[0], false);
+  DONE;
+})
+
 (define_insn "sminsi3"
   [(set (match_operand:SI 0 "register_operand"          "=r")
 	(smin:SI (match_operand:SI 1 "register_operand" " r")
 		 (match_operand:SI 2 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "min\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
 
 (define_insn "*btst"
-  [(set (match_operand:SI 0 "register_operand"                   "=   r")
-	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"    "    r")
 			 (const_int 1)
-			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
-  "TARGET_PERF_EXT"
+			 (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")))]
+  "TARGET_EXT_PERF"
   "btst\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
 
 ;; ----------------------------------------------------------------------------
+
+;; Pseudo NOPs
+
+;; Structural hazards NOP
+(define_insn "nop_res_dep"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_RES_DEP)]
+  ""
+  "! structural dependency (%0 cycles)"
+  [(set_attr "length" "0")]
+)
+
+;; Data hazards NOP
+(define_insn "nop_data_dep"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_VOLATILE_DATA_DEP)]
+  ""
+  "! data dependency (%0 cycles)"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "relax_group"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)]
+  ""
+  ".relax_hint %0"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "innermost_loop_begin"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_BEGIN)]
+  ""
+  ".innermost_loop_begin"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "innermost_loop_end"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_INNERMOST_LOOP_END)]
+  ""
+  ".innermost_loop_end"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "no_ifc_begin"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_BEGIN)]
+  ""
+  ".no_ifc_begin"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "no_ifc_end"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_IFC_END)]
+  ""
+  ".no_ifc_end"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "no_ex9_begin"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_BEGIN)]
+  ""
+  ".no_ex9_begin"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "no_ex9_end"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_NO_EX9_END)]
+  ""
+  ".no_ex9_end"
+  [(set_attr "length" "0")]
+)
+
+;; Output .omit_fp_begin for fp-as-gp optimization.
+;; Also we have to set $fp register.
+(define_insn "omit_fp_begin"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
+  ""
+  "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
+  [(set_attr "length" "8")]
+)
+
+;; Output .omit_fp_end for fp-as-gp optimization.
+;; Claim that we have to use $fp register.
+(define_insn "omit_fp_end"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
+  ""
+  "! -----\;.omit_fp_end\;! -----"
+  [(set_attr "length" "0")]
+)
+
+(define_insn "pop25return"
+  [(return)
+   (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]
+  ""
+  "! return for pop 25"
+  [(set_attr "length" "0")]
+)
+
+;; Add pc
+(define_insn "add_pc"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "0")
+		 (pc)))]
+  "TARGET_LINUX_ABI || flag_pic"
+  "add5.pc\t%0"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+{
+  emit_insn (gen_unspec_wsbh (operands[0], operands[1]));
+  emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16)));
+  DONE;
+})
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "wsbh\t%0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+;;  Hardware loop
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 1 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (unspec [(const_int 0)] UNSPEC_LOOP_END)
+	      (clobber (match_dup 2))])] ; match_scratch
+  "NDS32_HW_LOOP_P ()"
+{
+  /* The loop optimizer doesn't check the predicates... */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  operands[2] = gen_rtx_SCRATCH (SImode);
+})
+
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "0, 0, *r")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=r, m, m")
+	(plus:SI (match_dup 3)
+		 (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LOOP_END)
+   (clobber (match_scratch:SI 2 "=X, &r, &r"))]
+  "NDS32_HW_LOOP_P ()"
+  "#"
+  [(set_attr "length" "12, 12, 12")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "memory_operand" "")
+	(plus:SI (match_dup 3)
+		 (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LOOP_END)
+   (clobber (match_scratch:SI 2 ""))]
+  "NDS32_HW_LOOP_P ()"
+  [(set (match_dup 2) (plus:SI (match_dup 3) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (pc)
+	(if_then_else (ne (match_dup 2) (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  if (!REG_P (operands[3]))
+    {
+      emit_move_insn (operands[2], operands[3]);
+      operands[3] = operands[2];
+    }
+})
+
+(define_insn "mtlbi_hint"
+  [(set (reg:SI LB_REGNUM)
+	(match_operand:SI 0 "nds32_label_operand" "i"))
+   (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)]
+  "NDS32_HW_LOOP_P ()"
+  "mtlbi\t%0"
+  [(set_attr "length"	"4")])
+
+(define_insn "mtlbi"
+  [(set (reg:SI LB_REGNUM)
+	(match_operand:SI 0 "nds32_label_operand" "i"))]
+  "NDS32_HW_LOOP_P ()"
+  "mtlbi\t%0"
+  [(set_attr "length"	"4")])
+
+(define_insn "mtlei"
+  [(set (reg:SI LE_REGNUM)
+	(match_operand:SI 0 "nds32_label_operand" "i"))]
+  "NDS32_HW_LOOP_P ()"
+  "mtlei\t%0"
+  [(set_attr "length"	"4")])
+
+(define_insn "init_lc"
+  [(set (reg:SI LC_REGNUM)
+	(match_operand:SI 0 "register_operand" "r"))
+   (unspec [(match_operand 1 "const_int_operand" "i")] UNSPEC_LOOP_END)]
+  "NDS32_HW_LOOP_P ()"
+  "mtusr\t%0, LC"
+  [(set_attr "length"	"4")])
+
+; After replace hwloop, use this is pattern to get right CFG
+(define_insn "hwloop_cfg"
+  [(set (pc)
+	(if_then_else (ne (reg:SI LC_REGNUM)
+			  (const_int 1))
+		      (match_operand:SI 1 "nds32_label_operand" "i")
+		      (pc)))
+   (set (reg:SI LC_REGNUM)
+	(plus:SI (reg:SI LC_REGNUM)
+		 (const_int -1)))
+   (use (reg:SI LB_REGNUM))
+   (use (reg:SI LE_REGNUM))
+   (use (reg:SI LC_REGNUM))
+   (unspec [(match_operand 0 "const_int_operand" "i")] UNSPEC_LOOP_END)]
+  "TARGET_HWLOOP"
+  ""
+  [(set_attr "length" "0")])
+;; ----------------------------------------------------------------------------
+
+;; Patterns for exception handling
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  emit_insn (gen_nds32_eh_return (operands[0]));
+  DONE;
+})
+
+(define_insn_and_split "nds32_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_EH_RETURN)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx place;
+  rtx addr;
+
+  /* The operands[0] is the handler address.  We need to assign it
+     to return address rtx so that we can jump to exception handler
+     when returning from current function.  */
+
+  if (cfun->machine->lp_size == 0)
+    {
+      /* If $lp is not saved in the stack frame, we can take $lp directly.  */
+      place = gen_rtx_REG (SImode, LP_REGNUM);
+    }
+  else
+    {
+      /* Otherwise, we need to locate the stack slot of return address.
+	 The return address is generally saved in [$fp-4] location.
+	 However, DSE (dead store elimination) does not detect an alias
+	 between [$fp-x] and [$sp+y].  This can result in a store to save
+	 $lp introduced by builtin_eh_return() being incorrectly deleted
+	 if it is based on $fp.  The solution we take here is to compute
+	 the offset relative to stack pointer and then use $sp to access
+	 location so that the alias can be detected.
+	 FIXME: What if the immediate value "offset" is too large to be
+	        fit in a single addi instruction?  */
+      HOST_WIDE_INT offset;
+
+      offset = (cfun->machine->fp_size
+		+ cfun->machine->gp_size
+		+ cfun->machine->lp_size
+		+ cfun->machine->callee_saved_gpr_regs_size
+		+ cfun->machine->callee_saved_area_gpr_padding_bytes
+		+ cfun->machine->callee_saved_fpr_regs_size
+		+ cfun->machine->eh_return_data_regs_size
+		+ cfun->machine->local_size
+		+ cfun->machine->out_args_size);
+
+      addr = plus_constant (Pmode, stack_pointer_rtx, offset - 4);
+      place = gen_frame_mem (SImode, addr);
+    }
+
+  emit_move_insn (place, operands[0]);
+  DONE;
+})
+
+;; ----------------------------------------------------------------------------
+
+;; Patterns for TLS.
+
+(define_insn "tls_desc"
+  [(set (reg:SI 0)
+	(call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC)
+	      (const_int 1)))
+   (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))
+   (clobber (reg:SI LP_REGNUM))
+   (clobber (reg:SI TA_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_desc (operands);
+  }
+  [(set_attr "length" "20")
+   (set_attr "type" "branch")]
+)
+
+
+(define_insn "tls_ie"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE))
+   (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_ie (operands);
+  }
+  [(set (attr "length") (if_then_else (match_test "flag_pic")
+				      (const_int 12)
+				      (const_int 8)))
+   (set_attr "type" "misc")]
+)
+
+;; ----------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-md-auxiliary.c gcc-4.9.4/gcc/config/nds32/nds32-md-auxiliary.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-md-auxiliary.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-md-auxiliary.c	2016-08-08 20:37:45.506270091 +0200
@@ -0,0 +1,3772 @@
+/* Auxiliary functions for output asm template or expand rtl
+   pattern of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* This file is divided into three parts:
+
+     PART 1: Auxiliary static function definitions.
+
+     PART 2: Auxiliary function for expand RTL pattern.
+
+     PART 3: Auxiliary function for output asm template.  */
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 1: Auxiliary static function definitions.  */
+
+static int
+nds32_regno_to_enable4 (unsigned regno)
+{
+  switch (regno)
+    {
+    case 28: /* $r28/fp */
+      return 0x8;
+    case 29: /* $r29/gp */
+      return 0x4;
+    case 30: /* $r30/lp */
+      return 0x2;
+    case 31: /* $r31/sp */
+      return 0x1;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A helper function to return character based on byte size.  */
+static char
+nds32_byte_to_size (int byte)
+{
+  switch (byte)
+    {
+    case 4:
+      return 'w';
+    case 2:
+      return 'h';
+    case 1:
+      return 'b';
+    default:
+      /* Normally it should not be here.  */
+      gcc_unreachable ();
+    }
+}
+
+static int
+nds32_inverse_cond_code (int code)
+{
+  switch (code)
+    {
+      case NE:
+	return EQ;
+      case EQ:
+	return NE;
+      case GT:
+	return LE;
+      case LE:
+	return GT;
+      case GE:
+	return LT;
+      case LT:
+	return GE;
+      default:
+	gcc_unreachable ();
+    }
+}
+
+static const char *
+nds32_cond_code_str (int code)
+{
+  switch (code)
+    {
+      case NE:
+	return "ne";
+      case EQ:
+	return "eq";
+      case GT:
+	return "gt";
+      case LE:
+	return "le";
+      case GE:
+	return "ge";
+      case LT:
+	return "lt";
+      default:
+	gcc_unreachable ();
+    }
+}
+
+static void
+output_cond_branch (int code, const char *suffix, bool r5_p,
+		    bool long_jump_p, rtx *operands)
+{
+  char pattern[256];
+  const char *cond_code;
+  bool align_p = NDS32_ALIGN_P ();
+  const char *align = align_p ? "\t.align\t2\n" : "";
+
+  if (r5_p && REGNO (operands[2]) == 5 && TARGET_16_BIT)
+    {
+      /* This is special case for beqs38 and bnes38,
+	 second operand 2 can't be $r5 and it's almost meanless,
+	 however it may occur after copy propgation.  */
+      if (code == EQ)
+	{
+	  /* $r5 == $r5 always taken! */
+	  if (long_jump_p)
+	    snprintf (pattern, sizeof (pattern),
+		      "j\t%%3");
+	  else
+	    snprintf (pattern, sizeof (pattern),
+		      "j8\t%%3");
+	}
+      else
+	/* Don't output anything since $r5 != $r5 never taken! */
+	pattern[0] = '\0';
+    }
+  else if (long_jump_p)
+    {
+      int inverse_code = nds32_inverse_cond_code (code);
+      cond_code = nds32_cond_code_str (inverse_code);
+
+      /*      b<cond><suffix>  $r0, $r1, .L0
+	    =>
+	      b<inverse_cond><suffix>  $r0, $r1, .LCB0
+	      j  .L0
+	    .LCB0:
+
+	    or
+
+	      b<cond><suffix>  $r0, $r1, .L0
+	    =>
+	      b<inverse_cond><suffix>  $r0, $r1, .LCB0
+	      j  .L0
+	    .LCB0:
+      */
+      if (r5_p && TARGET_16_BIT)
+	{
+	  snprintf (pattern, sizeof (pattern),
+		    "b%ss38\t %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:",
+		    cond_code, align);
+	}
+      else
+	{
+	  snprintf (pattern, sizeof (pattern),
+		    "b%s%s\t%%1, %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:",
+		    cond_code, suffix, align);
+	}
+    }
+  else
+    {
+      cond_code = nds32_cond_code_str (code);
+      if (r5_p && TARGET_16_BIT)
+	{
+	  /* b<cond>s38  $r1, .L0   */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%ss38\t %%2, %%3", cond_code);
+	}
+      else
+	{
+	  /* b<cond><suffix>  $r0, $r1, .L0   */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%s%s\t%%1, %%2, %%3", cond_code, suffix);
+	}
+    }
+
+  output_asm_insn (pattern, operands);
+}
+
+static void
+output_cond_branch_compare_zero (int code, const char *suffix,
+				 bool long_jump_p, rtx *operands,
+				 bool ta_implied_p)
+{
+  char pattern[256];
+  const char *cond_code;
+  bool align_p = NDS32_ALIGN_P ();
+  const char *align = align_p ? "\t.align\t2\n" : "";
+  if (long_jump_p)
+    {
+      int inverse_code = nds32_inverse_cond_code (code);
+      cond_code = nds32_cond_code_str (inverse_code);
+
+      if (ta_implied_p && TARGET_16_BIT)
+	{
+	  /*    b<cond>z<suffix>  .L0
+	      =>
+		b<inverse_cond>z<suffix>  .LCB0
+		j  .L0
+	      .LCB0:
+	   */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%sz%s\t.LCB%%=\n\tj\t%%2\n%s.LCB%%=:",
+		    cond_code, suffix, align);
+	}
+      else
+	{
+	  /*      b<cond>z<suffix>  $r0, .L0
+		=>
+		  b<inverse_cond>z<suffix>  $r0, .LCB0
+		  j  .L0
+		.LCB0:
+	   */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%sz%s\t%%1, .LCB%%=\n\tj\t%%2\n%s.LCB%%=:",
+		    cond_code, suffix, align);
+	}
+    }
+  else
+    {
+      cond_code = nds32_cond_code_str (code);
+      if (ta_implied_p && TARGET_16_BIT)
+	{
+	  /* b<cond>z<suffix>  .L0  */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%sz%s\t%%2", cond_code, suffix);
+	}
+      else
+	{
+	  /* b<cond>z<suffix>  $r0, .L0  */
+	  snprintf (pattern, sizeof (pattern),
+		    "b%sz%s\t%%1, %%2", cond_code, suffix);
+	}
+    }
+
+  output_asm_insn (pattern, operands);
+}
+
+static void
+nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p)
+{
+  rtx src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  src_high_part = nds32_di_high_part_subreg (src);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+	{
+	  if (logic_shift_p)
+	    {
+	      emit_insn (gen_uwext (dst_low_part, src,
+						  shiftamount));
+	      emit_insn (gen_lshrsi3 (dst_high_part, src_high_part,
+						     shiftamount));
+	    }
+	  else
+	    {
+	      emit_insn (gen_wext (dst_low_part, src,
+						 shiftamount));
+	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+						     shiftamount));
+	    }
+	}
+      else
+	{
+	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+	  if (logic_shift_p)
+	    {
+	      emit_insn (gen_lshrsi3 (dst_low_part, src_high_part,
+						    new_shift_amout));
+	      emit_move_insn (dst_high_part, const0_rtx);
+	    }
+	  else
+	    {
+	      emit_insn (gen_ashrsi3 (dst_low_part, src_high_part,
+						    new_shift_amout));
+	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+						     GEN_INT (31)));
+	    }
+	}
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      if (logic_shift_p)
+	{
+	  /*
+	     if (shiftamount < 32)
+	       dst_low_part = wext (src, shiftamount)
+	       dst_high_part = src_high_part >> shiftamount
+	     else
+	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
+	       dst_high_part = 0
+	  */
+	  emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount));
+	  emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part,
+						     shiftamount));
+
+	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+	  emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part,
+						    new_shift_amout));
+	  emit_move_insn (dst_high_part_g32, const0_rtx);
+	}
+      else
+	{
+	  /*
+	     if (shiftamount < 32)
+	       dst_low_part = wext (src, shiftamount)
+	       dst_high_part = src_high_part >> shiftamount
+	     else
+	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
+	       # shift 31 for sign extend
+	       dst_high_part = src_high_part >> 31
+	  */
+	  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+	  emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part,
+						     shiftamount));
+
+	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+	  emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part,
+						    new_shift_amout));
+	  emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part,
+						     GEN_INT (31)));
+	}
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			      dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			      dst_high_part_l32, dst_high_part_g32));
+  }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 2: Auxiliary function for expand RTL pattern.  */
+
+enum nds32_expand_result_type
+nds32_expand_cbranch (rtx *operands)
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	return EXPAND_CREATE_TEMPLATE;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  if (optimize_size || optimize == 0)
+	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+	  else
+	    tmp_reg = gen_reg_rtx (SImode);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  enum rtx_code cmp_code;
+
+	  rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+	  if (satisfies_constraint_Is15 (plus1))
+	    {
+	      operands[2] = plus1;
+	      cmp_code = EQ;
+	      if (code == GT)
+		{
+		  /* GT, use slts instruction */
+		  emit_insn (
+		    gen_slts_compare (tmp_reg, operands[1], operands[2]));
+		}
+	      else
+		{
+		  /* GTU, use slt instruction */
+		  emit_insn (
+		    gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+		}
+	    }
+	  else
+	    {
+	      cmp_code = NE;
+	      if (code == GT)
+		{
+		  /* GT, use slts instruction */
+		  emit_insn (
+		    gen_slts_compare (tmp_reg, operands[2], operands[1]));
+		}
+	      else
+		{
+		  /* GTU, use slt instruction */
+		  emit_insn (
+		    gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+		}
+	    }
+
+	  PUT_CODE (operands[0], cmp_code);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  if (optimize_size || optimize == 0)
+	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+	  else
+	    tmp_reg = gen_reg_rtx (SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  return EXPAND_DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      if (optimize_size || optimize == 0)
+	tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+      else
+	tmp_reg = gen_reg_rtx (SImode);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      return EXPAND_DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      if (optimize_size || optimize == 0)
+	tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+      else
+	tmp_reg = gen_reg_rtx (SImode);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      return EXPAND_DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  if (optimize_size || optimize == 0)
+	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+	  else
+	    tmp_reg = gen_reg_rtx (SImode);
+
+	  enum rtx_code cmp_code;
+	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
+	     We better have an assert here in case GCC does not properly
+	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
+	  rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+	  if (satisfies_constraint_Is15 (plus1))
+	    {
+	      operands[2] = plus1;
+	      cmp_code = NE;
+	      if (code == LE)
+		{
+		  /* LE, use slts instruction */
+		  emit_insn (
+		    gen_slts_compare (tmp_reg, operands[1], operands[2]));
+		}
+	      else
+		{
+		  /* LEU, use slt instruction */
+		  emit_insn (
+		    gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+		}
+	    }
+	  else
+	    {
+	      cmp_code = EQ;
+	      if (code == LE)
+		{
+		  /* LE, use slts instruction */
+		  emit_insn (
+		    gen_slts_compare (tmp_reg, operands[2], operands[1]));
+		}
+	      else
+		{
+		  /* LEU, use slt instruction */
+		  emit_insn (
+		    gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+		}
+	    }
+
+	  PUT_CODE (operands[0], cmp_code);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  if (optimize_size || optimize == 0)
+	    tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+	  else
+	    tmp_reg = gen_reg_rtx (SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  return EXPAND_DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+	 what kind of the operand is.
+	 So just generate original template rtx.  */
+
+      /* Put operands[2] into register if operands[2] is a large
+	 const_int or ISAv2.  */
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && (!satisfies_constraint_Is11 (operands[2])
+	      || TARGET_ISA_V2))
+	operands[2] = force_reg (SImode, operands[2]);
+
+      return EXPAND_CREATE_TEMPLATE;
+
+    default:
+      return EXPAND_FAIL;
+    }
+}
+
+enum nds32_expand_result_type
+nds32_expand_cstore (rtx *operands)
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+		 slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_eq0 (operands[0], tmp_reg));
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+		 slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_eq0 (operands[0], tmp_reg));
+
+	  return EXPAND_DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+		 slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  /* If the integer value is not in the range of imm15s,
+	     we need to force register first because our addsi3 pattern
+	     only accept nds32_rimm15s_operand predicate.  */
+	  if (!satisfies_constraint_Is15 (operands[3]))
+	    operands[3] = force_reg (SImode, operands[3]);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+		 slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  return EXPAND_DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      return EXPAND_DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+		 slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+		 xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  return EXPAND_DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      return EXPAND_DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+		 slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  return EXPAND_DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+					  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  return EXPAND_DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+void
+nds32_expand_float_cbranch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  enum rtx_code new_code = code;
+  rtx cmp_op0 = operands[1];
+  rtx cmp_op1 = operands[2];
+  rtx tmp_reg;
+  rtx tmp;
+
+  int reverse = 0;
+
+  /* Main Goal: Use compare instruction + branch instruction.
+
+     For example:
+     GT, GE: swap condition and swap operands and generate
+     compare instruction(LT, LE) + branch not equal instruction.
+
+     UNORDERED, LT, LE, EQ: no need to change and generate
+     compare instruction(UNORDERED, LT, LE, EQ) + branch not equal instruction.
+
+     ORDERED, NE: reverse condition and generate
+     compare instruction(EQ) + branch equal instruction. */
+
+  switch (code)
+    {
+    case GT:
+    case GE:
+      tmp = cmp_op0;
+      cmp_op0 = cmp_op1;
+      cmp_op1 = tmp;
+      new_code = swap_condition (new_code);
+      break;
+    case UNORDERED:
+    case LT:
+    case LE:
+    case EQ:
+      break;
+    case ORDERED:
+    case NE:
+      new_code = reverse_condition (new_code);
+      reverse = 1;
+      break;
+    case UNGT:
+    case UNGE:
+      new_code = reverse_condition_maybe_unordered (new_code);
+      reverse = 1;
+      break;
+    case UNLT:
+    case UNLE:
+      new_code = reverse_condition_maybe_unordered (new_code);
+      tmp = cmp_op0;
+      cmp_op0 = cmp_op1;
+      cmp_op1 = tmp;
+      new_code = swap_condition (new_code);
+      reverse = 1;
+      break;
+    default:
+      return;
+    }
+
+  tmp_reg = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+			  gen_rtx_fmt_ee (new_code, SImode,
+					  cmp_op0, cmp_op1)));
+
+  PUT_CODE (operands[0], reverse ? EQ : NE);
+  emit_insn (gen_cbranchsi4 (operands[0], tmp_reg,
+			     const0_rtx, operands[3]));
+}
+
+void
+nds32_expand_float_cstore (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  enum rtx_code new_code = code;
+  enum machine_mode mode = GET_MODE (operands[2]);
+
+  rtx cmp_op0 = operands[2];
+  rtx cmp_op1 = operands[3];
+  rtx tmp;
+
+  /* Main Goal: Use compare instruction to store value.
+
+     For example:
+     GT, GE: swap condition and swap operands.
+       reg_R = (reg_A >  reg_B) --> fcmplt reg_R, reg_B, reg_A
+       reg_R = (reg_A >= reg_B) --> fcmple reg_R, reg_B, reg_A
+
+     LT, LE, EQ: no need to change, it is already LT, LE, EQ.
+       reg_R = (reg_A <  reg_B) --> fcmplt reg_R, reg_A, reg_B
+       reg_R = (reg_A <= reg_B) --> fcmple reg_R, reg_A, reg_B
+       reg_R = (reg_A == reg_B) --> fcmpeq reg_R, reg_A, reg_B
+
+     ORDERED: reverse condition and using xor insturction to achieve 'ORDERED'.
+       reg_R = (reg_A != reg_B) --> fcmpun reg_R, reg_A, reg_B
+				       xor reg_R, reg_R, const1_rtx
+
+     NE: reverse condition and using xor insturction to achieve 'NE'.
+       reg_R = (reg_A != reg_B) --> fcmpeq reg_R, reg_A, reg_B
+				       xor reg_R, reg_R, const1_rtx */
+  switch (code)
+    {
+    case GT:
+    case GE:
+      tmp = cmp_op0;
+      cmp_op0 = cmp_op1;
+      cmp_op1 =tmp;
+      new_code = swap_condition (new_code);
+      break;
+    case UNORDERED:
+    case LT:
+    case LE:
+    case EQ:
+      break;
+    case ORDERED:
+      if (mode == SFmode)
+	emit_insn (gen_cmpsf_un (operands[0], cmp_op0, cmp_op1));
+      else
+	emit_insn (gen_cmpdf_un (operands[0], cmp_op0, cmp_op1));
+
+      emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+      return;
+    case NE:
+      if (mode == SFmode)
+	emit_insn (gen_cmpsf_eq (operands[0], cmp_op0, cmp_op1));
+      else
+	emit_insn (gen_cmpdf_eq (operands[0], cmp_op0, cmp_op1));
+
+      emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+      return;
+    default:
+      return;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (new_code, SImode,
+					  cmp_op0, cmp_op1)));
+}
+
+enum nds32_expand_result_type
+nds32_expand_movcc (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  enum rtx_code new_code = code;
+  enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0));
+  rtx cmp_op0 = XEXP (operands[1], 0);
+  rtx cmp_op1 = XEXP (operands[1], 1);
+  rtx tmp;
+
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+	 we have gcc generate original template rtx.  */
+      return EXPAND_CREATE_TEMPLATE;
+    }
+  else if ((TARGET_FPU_SINGLE && cmp0_mode == SFmode)
+	   || (TARGET_FPU_DOUBLE && cmp0_mode == DFmode))
+    {
+      nds32_expand_float_movcc (operands);
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+	 comparison in Andes ISA, the major strategy we use here is to
+	 convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+	 We design constraints properly so that the reload phase will assist
+	 to make one source operand to use same register as result operand.
+	 Then we can use cmovz/cmovn to catch the other source operand
+	 which has different register.  */
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+	 Strategy : Reverse condition and swap comparison operands
+
+	 For example:
+
+	     a <= b ? P : Q   (LE or LEU)
+	 --> a >  b ? Q : P   (reverse condition)
+	 --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+	     a >= b ? P : Q   (GE or GEU)
+	 --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+	     a <  b ? P : Q   (LT or LTU)
+	 --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+	     a >  b ? P : Q   (GT or GTU)
+	 --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ:
+	case NE:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  return EXPAND_FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+	 so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      if (new_code == EQ || new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  /* tmp == 0 if cmp_op0 == cmp_op1.  */
+	  operands[1] = gen_rtx_fmt_ee (new_code, VOIDmode, tmp, const0_rtx);
+	}
+      else
+	{
+	  /* This emit_insn will create corresponding 'slt/slts'
+	      insturction.  */
+	  if (new_code == LT)
+	    emit_insn (gen_slts_compare (tmp, cmp_op0, cmp_op1));
+	  else if (new_code == LTU)
+	    emit_insn (gen_slt_compare (tmp, cmp_op0, cmp_op1));
+	  else
+	    gcc_unreachable ();
+
+	  /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+	     so that cmovz or cmovn will be matched later.
+
+	     For reverse condition cases, we want to create a semantic that:
+	       (eq X 0) --> pick up "else" part
+	     For normal cases, we want to create a semantic that:
+	       (ne X 0) --> pick up "then" part
+
+	     Later we will have cmovz/cmovn instruction pattern to
+	     match corresponding behavior and output instruction.  */
+	  operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+					VOIDmode, tmp, const0_rtx);
+	}
+    }
+  return EXPAND_CREATE_TEMPLATE;
+}
+
+void
+nds32_expand_float_movcc (rtx *operands)
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+	 we have gcc generate original template rtx.  */
+      return;
+    }
+  else
+    {
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      enum machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0));
+      enum machine_mode cmp1_mode = GET_MODE (XEXP (operands[1], 1));
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+
+      /* Compare instruction Operations: (cmp_op0 condition cmp_op1) ? 1 : 0,
+	 when result is 1, and 'reverse' be set 1 for fcmovzs instructuin. */
+      int reverse = 0;
+
+      /* Main Goal: Use cmpare instruction + conditional move instruction.
+	 Strategy : swap condition and swap comparison operands.
+
+	 For example:
+	     a > b ? P : Q   (GT)
+	 --> a < b ? Q : P   (swap condition)
+	 --> b < a ? Q : P   (swap comparison operands to achieve 'GT')
+
+	     a >= b ? P : Q  (GE)
+	 --> a <= b ? Q : P  (swap condition)
+	 --> b <= a ? Q : P  (swap comparison operands to achieve 'GE')
+
+	     a <  b ? P : Q  (LT)
+	 --> (NO NEED TO CHANGE, it is already 'LT')
+
+	     a >= b ? P : Q  (LE)
+	 --> (NO NEED TO CHANGE, it is already 'LE')
+
+	     a == b ? P : Q  (EQ)
+	 --> (NO NEED TO CHANGE, it is already 'EQ') */
+
+      switch (code)
+	{
+	case GT:
+	case GE:
+	  tmp = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 =tmp;
+	  new_code = swap_condition (new_code);
+	  break;
+	case UNORDERED:
+	case LT:
+	case LE:
+	case EQ:
+	  break;
+	case ORDERED:
+	case NE:
+	  reverse = 1;
+	  new_code = reverse_condition (new_code);
+	  break;
+	case UNGT:
+	case UNGE:
+	  new_code = reverse_condition_maybe_unordered (new_code);
+	  reverse = 1;
+	  break;
+	case UNLT:
+	case UNLE:
+	  new_code = reverse_condition_maybe_unordered (new_code);
+	  tmp = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+	  new_code = swap_condition (new_code);
+	  reverse = 1;
+	  break;
+	default:
+	  return;
+	}
+
+      /* Use a temporary register to store fcmpxxs result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Create float compare instruction for SFmode and DFmode,
+	 other MODE using cstoresi create compare instruction. */
+      if ((cmp0_mode == DFmode || cmp0_mode == SFmode)
+	  && (cmp1_mode == DFmode || cmp1_mode == SFmode))
+	{
+	  /* This emit_insn create corresponding float compare instruction */
+	  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				  gen_rtx_fmt_ee (new_code, SImode,
+						  cmp_op0, cmp_op1)));
+	}
+      else
+	{
+	  /* This emit_insn using cstoresi create corresponding
+	     compare instruction */
+	  PUT_CODE (operands[1], new_code);
+	  emit_insn (gen_cstoresi4 (tmp, operands[1],
+				    cmp_op0, cmp_op1));
+	}
+      /* operands[1] crete corresponding condition move instruction
+	 for fcmovzs and fcmovns.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+}
+
+void
+nds32_emit_push_fpr_callee_saved (int base_offset)
+{
+  rtx fpu_insn;
+  rtx reg, mem;
+  unsigned int regno = cfun->machine->callee_saved_first_fpr_regno;
+  unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
+
+  while (regno <= last_fpr)
+    {
+      /* Handling two registers, using fsdi instruction.  */
+      reg = gen_rtx_REG (DFmode, regno);
+      mem = gen_frame_mem (DFmode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  base_offset));
+      base_offset += 8;
+      regno += 2;
+      fpu_insn = emit_move_insn (mem, reg);
+      RTX_FRAME_RELATED_P (fpu_insn) = 1;
+    }
+}
+
+void
+nds32_emit_pop_fpr_callee_saved (int gpr_padding_size)
+{
+  rtx fpu_insn;
+  rtx reg, mem, addr;
+  rtx dwarf, adjust_sp_rtx;
+  unsigned int regno = cfun->machine->callee_saved_first_fpr_regno;
+  unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno;
+  int padding = 0;
+
+  while (regno <= last_fpr)
+    {
+      /* Handling two registers, using fldi.bi instruction.  */
+      if ((regno + 1) >= last_fpr)
+	padding = gpr_padding_size;
+
+      reg = gen_rtx_REG (DFmode, (regno));
+      addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
+				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						GEN_INT (8 + padding)));
+      mem = gen_frame_mem (DFmode, addr);
+      regno += 2;
+      fpu_insn = emit_move_insn (reg, mem);
+
+      adjust_sp_rtx =
+	gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx,
+				    8 + padding));
+
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX);
+      /* Tell gcc we adjust SP in this insn.  */
+      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx),
+			      dwarf);
+      RTX_FRAME_RELATED_P (fpu_insn) = 1;
+      REG_NOTES (fpu_insn) = dwarf;
+    }
+}
+
+void
+nds32_emit_v3pop_fpr_callee_saved (int base)
+{
+  int fpu_base_addr = base;
+  int regno;
+  rtx fpu_insn;
+  rtx reg, mem;
+  rtx dwarf;
+
+  regno = cfun->machine->callee_saved_first_fpr_regno;
+  while (regno <= cfun->machine->callee_saved_last_fpr_regno)
+    {
+      /* Handling two registers, using fldi instruction.  */
+      reg = gen_rtx_REG (DFmode, regno);
+      mem = gen_frame_mem (DFmode, plus_constant (Pmode,
+						  stack_pointer_rtx,
+						  fpu_base_addr));
+      fpu_base_addr += 8;
+      regno += 2;
+      fpu_insn = emit_move_insn (reg, mem);
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX);
+      RTX_FRAME_RELATED_P (fpu_insn) = 1;
+      REG_NOTES (fpu_insn) = dwarf;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 3: Auxiliary function for output asm template. */
+
+/* Function to generate PC relative jump table.
+   Refer to nds32.md for more details.
+
+   The following is the sample for the case that diff value
+   can be presented in '.short' size.
+
+     addi    $r1, $r1, -(case_lower_bound)
+     slti    $ta, $r1, (case_number)
+     beqz    $ta, .L_skip_label
+
+     la      $ta, .L35             ! get jump table address
+     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
+     addi    $ta, $r1, $ta
+     jr5     $ta
+
+     ! jump table entry
+   L35:
+     .short  .L25-.L35
+     .short  .L26-.L35
+     .short  .L27-.L35
+     .short  .L28-.L35
+     .short  .L29-.L35
+     .short  .L30-.L35
+     .short  .L31-.L35
+     .short  .L32-.L35
+     .short  .L33-.L35
+     .short  .L34-.L35 */
+const char *
+nds32_output_casesi_pc_relative (rtx *operands)
+{
+  enum machine_mode mode;
+  rtx diff_vec;
+
+  diff_vec = PATTERN (NEXT_INSN (operands[1]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  /* Step C: "t <-- operands[1]".  */
+  if (flag_pic)
+    {
+      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
+      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
+      output_asm_insn ("add\t$ta, $ta, $gp", operands);
+    }
+  else
+    output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Get the mode of each element in the difference vector.  */
+  mode = GET_MODE (diff_vec);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
+     where m is 0, 1, or 2 to load address-diff value from table.  */
+  switch (mode)
+    {
+    case QImode:
+      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
+      break;
+    case HImode:
+      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
+      break;
+    case SImode:
+      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Step E: "t <-- z + t".
+     Add table label_ref with address-diff value to
+     obtain target case address.  */
+  output_asm_insn ("add\t$ta, %2, $ta", operands);
+
+  /* Step F: jump to target with register t.  */
+  if (TARGET_16_BIT)
+    return "jr5\t$ta";
+  else
+    return "jr\t$ta";
+}
+
+/* Function to generate normal jump table.  */
+const char *
+nds32_output_casesi (rtx *operands)
+{
+  /* Step C: "t <-- operands[1]".  */
+  if (flag_pic)
+    {
+      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
+      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
+      output_asm_insn ("add\t$ta, $ta, $gp", operands);
+    }
+  else
+    output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
+  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+
+  /* No need to perform Step E, which is only used for
+     pc relative jump table.  */
+
+  /* Step F: jump to target with register z.  */
+  if (TARGET_16_BIT)
+    return "jr5\t%2";
+  else
+    return "jr\t%2";
+}
+
+
+/* Function to return memory format.  */
+enum nds32_16bit_address_type
+nds32_mem_format (rtx op)
+{
+  enum machine_mode mode_test;
+  int val;
+  int regno;
+
+  if (!TARGET_16_BIT)
+    return ADDRESS_NOT_16BIT_FORMAT;
+
+  mode_test = GET_MODE (op);
+
+  op = XEXP (op, 0);
+
+  /* 45 format.  */
+  if (GET_CODE (op) == REG
+      && ((mode_test == SImode) || (mode_test == SFmode)))
+    return ADDRESS_REG;
+
+  /* 333 format for QI/HImode.  */
+  if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM))
+    return ADDRESS_LO_REG_IMM3U;
+
+  /* post_inc 333 format.  */
+  if ((GET_CODE (op) == POST_INC)
+      && ((mode_test == SImode) || (mode_test == SFmode)))
+    {
+      regno = REGNO(XEXP (op, 0));
+
+      if (regno < 8)
+	return ADDRESS_POST_INC_LO_REG_IMM3U;
+    }
+
+  /* post_inc 333 format.  */
+  if ((GET_CODE (op) == POST_MODIFY)
+      && ((mode_test == SImode) || (mode_test == SFmode))
+      && (REG_P (XEXP (XEXP (op, 1), 0)))
+      && (CONST_INT_P (XEXP (XEXP (op, 1), 1))))
+    {
+      regno = REGNO (XEXP (XEXP (op, 1), 0));
+      val = INTVAL (XEXP (XEXP (op, 1), 1));
+      if (regno < 8 && val > 0 && val < 32)
+	return ADDRESS_POST_MODIFY_LO_REG_IMM3U;
+    }
+
+  if ((GET_CODE (op) == PLUS)
+      && (GET_CODE (XEXP (op, 0)) == REG)
+      && (GET_CODE (XEXP (op, 1)) == CONST_INT))
+    {
+      val = INTVAL (XEXP (op, 1));
+
+      regno = REGNO(XEXP (op, 0));
+
+      if (regno > 8
+	  && regno != SP_REGNUM
+	  && regno != FP_REGNUM)
+	return ADDRESS_NOT_16BIT_FORMAT;
+
+      switch (mode_test)
+	{
+	case QImode:
+	  /* 333 format.  */
+	  if (val >= 0 && val < 8 && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	case HImode:
+	  /* 333 format.  */
+	  if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	case SImode:
+	case SFmode:
+	case DFmode:
+	  /* r8 imply fe format.  */
+	  if ((regno == 8) &&
+	      (val >= -128 && val <= -4 && (val % 4 == 0)))
+	    return ADDRESS_R8_IMM7U;
+	  /* fp imply 37 format.  */
+	  if ((regno == FP_REGNUM) &&
+	      (val >= 0 && val < 512 && (val % 4 == 0)))
+	    return ADDRESS_FP_IMM7U;
+	  /* sp imply 37 format.  */
+	  else if ((regno == SP_REGNUM) &&
+		   (val >= 0 && val < 512 && (val % 4 == 0)))
+	    return ADDRESS_SP_IMM7U;
+	  /* 333 format.  */
+	  else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8)
+	    return ADDRESS_LO_REG_IMM3U;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  return ADDRESS_NOT_16BIT_FORMAT;
+}
+
+/* Output 16-bit store.  */
+const char *
+nds32_output_16bit_store (rtx *operands, int byte)
+{
+  char pattern[100];
+  char size;
+  rtx code = XEXP (operands[0], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (nds32_mem_format (operands[0]))
+    {
+    case ADDRESS_REG:
+      operands[0] = code;
+      output_asm_insn ("swi450\t%1, [%0]", operands);
+      break;
+    case ADDRESS_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_INC_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0, 4");
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_MODIFY_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0");
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_FP_IMM7U:
+      output_asm_insn ("swi37\t%1, %0", operands);
+      break;
+    case ADDRESS_SP_IMM7U:
+      /* Get immediate value and set back to operands[1].  */
+      operands[0] = XEXP (code, 1);
+      output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands);
+      break;
+    default:
+      break;
+    }
+
+  return "";
+}
+
+/* Output 16-bit load.  */
+const char *
+nds32_output_16bit_load (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (nds32_mem_format (operands[1]))
+    {
+    case ADDRESS_REG:
+      operands[1] = code;
+      output_asm_insn ("lwi450\t%0, [%1]", operands);
+      break;
+    case ADDRESS_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size);
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_INC_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1, 4");
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_POST_MODIFY_LO_REG_IMM3U:
+      snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1");
+      output_asm_insn (pattern, operands);
+      break;
+    case ADDRESS_R8_IMM7U:
+      output_asm_insn ("lwi45.fe\t%0, %e1", operands);
+      break;
+    case ADDRESS_FP_IMM7U:
+      output_asm_insn ("lwi37\t%0, %1", operands);
+      break;
+    case ADDRESS_SP_IMM7U:
+      /* Get immediate value and set back to operands[0].  */
+      operands[1] = XEXP (code, 1);
+      output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands);
+      break;
+    default:
+      break;
+    }
+
+  return "";
+}
+
+/* Output 32-bit store.  */
+const char *
+nds32_output_32bit_store (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code = XEXP (operands[0], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+	 => access location by using register,
+	 use "sbi / shi / swi" */
+      snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+	 (mem (const (...)))
+	 => access global variables,
+	 use "sbi.gp / shi.gp / swi.gp" */
+      operands[0] = XEXP (operands[0], 0);
+      snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+	 => access location by using register which will be post increment,
+	 use "sbi.bi / shi.bi / swi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"s%ci.bi\t%%1, %%0, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+	 => access location by using register which will be post decrement,
+	 use "sbi.bi / shi.bi / swi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"s%ci.bi\t%%1, %%0, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "sb.bi/ sh.bi / sw.bi" */
+	  snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "sbi.bi/ shi.bi / swi.bi" */
+	  snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     => access location by adding two registers,
+	     use "sb / sh / sw" */
+	  snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "sbi / shi / swi" */
+	  snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[0] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"s%ci\t%%1, [%%0 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output 32-bit load.  */
+const char *
+nds32_output_32bit_load (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code;
+
+  code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+	 => access location by using register,
+	 use "lbi / lhi / lwi" */
+      snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+	 (mem (const (...)))
+	 => access global variables,
+	 use "lbi.gp / lhi.gp / lwi.gp" */
+      operands[1] = XEXP (operands[1], 0);
+      snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+	 => access location by using register which will be post increment,
+	 use "lbi.bi / lhi.bi / lwi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%ci.bi\t%%0, %%1, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+	 => access location by using register which will be post decrement,
+	 use "lbi.bi / lhi.bi / lwi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%ci.bi\t%%0, %%1, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "lb.bi/ lh.bi / lw.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "lbi.bi/ lhi.bi / lwi.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     use "lb / lh / lw" */
+	  snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "lbi / lhi / lwi" */
+	  snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[1] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"l%ci\t%%0, [%%1 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output 32-bit load with signed extension.  */
+const char *
+nds32_output_32bit_load_se (rtx *operands, int byte)
+{
+  char pattern[100];
+  unsigned char size;
+  rtx code;
+
+  code = XEXP (operands[1], 0);
+
+  size = nds32_byte_to_size (byte);
+
+  switch (GET_CODE (code))
+    {
+    case REG:
+      /* (mem (reg X))
+	 => access location by using register,
+	 use "lbsi / lhsi" */
+      snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+	 (mem (const (...)))
+	 => access global variables,
+	 use "lbsi.gp / lhsi.gp" */
+      operands[1] = XEXP (operands[1], 0);
+      snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size);
+      break;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+	 => access location by using register which will be post increment,
+	 use "lbsi.bi / lhsi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%csi.bi\t%%0, %%1, %d", size, byte);
+      break;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+	 => access location by using register which will be post decrement,
+	 use "lbsi.bi / lhsi.bi" */
+      snprintf (pattern, sizeof (pattern),
+		"l%csi.bi\t%%0, %%1, -%d", size, byte);
+      break;
+
+    case POST_MODIFY:
+      switch (GET_CODE (XEXP (XEXP (code, 1), 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (post_modify (reg) (plus (reg) (reg))))
+	     => access location by using register which will be
+	     post modified with reg,
+	     use "lbs.bi/ lhs.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (post_modify (reg) (plus (reg) (const_int))))
+	     => access location by using register which will be
+	     post modified with const_int,
+	     use "lbsi.bi/ lhsi.bi" */
+	  snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (code, 1)))
+	{
+	case REG:
+	case SUBREG:
+	  /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg))
+	     use "lbs / lhs" */
+	  snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size);
+	  break;
+	case CONST_INT:
+	  /* (mem (plus reg const_int))
+	     => access location by adding one register with const_int,
+	     use "lbsi / lhsi" */
+	  snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size);
+	  break;
+	default:
+	  abort ();
+	}
+      break;
+
+    case LO_SUM:
+      operands[2] = XEXP (code, 1);
+      operands[1] = XEXP (code, 0);
+      snprintf (pattern, sizeof (pattern),
+		"l%csi\t%%0, [%%1 + lo12(%%2)]", size);
+      break;
+
+    default:
+      abort ();
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Function to output stack push operation.
+   We need to deal with normal stack push multiple or stack v3push.  */
+const char *
+nds32_output_stack_push (rtx par_rtx)
+{
+  /* A string pattern for output_asm_insn().  */
+  char pattern[100];
+  /* The operands array which will be used in output_asm_insn().  */
+  rtx operands[3];
+  /* Pick up varargs first regno and last regno for further use.  */
+  int rb_va_args = cfun->machine->va_args_first_regno;
+  int re_va_args = cfun->machine->va_args_last_regno;
+  int last_argument_regno = NDS32_FIRST_GPR_REGNUM
+			    + NDS32_MAX_GPR_REGS_FOR_ARGS
+			    - 1;
+  /* Pick up first and last eh data regno for further use.  */
+  int rb_eh_data = cfun->machine->eh_return_data_first_regno;
+  int re_eh_data = cfun->machine->eh_return_data_last_regno;
+  int first_eh_data_regno = EH_RETURN_DATA_REGNO (0);
+  /* Pick up callee-saved first regno and last regno for further use.  */
+  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
+  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
+
+  /* First we need to check if we are pushing argument registers not used
+     for the named arguments.  If so, we have to create 'smw.adm' (push.s)
+     instruction.  */
+  if (reg_mentioned_p (gen_rtx_REG (SImode, last_argument_regno), par_rtx))
+    {
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_va_args);
+      operands[1] = gen_rtx_REG (SImode, re_va_args);
+      /* Create assembly code pattern: "Rb, Re, { }".  */
+      snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }");
+      /* We use output_asm_insn() to output assembly code by ourself.  */
+      output_asm_insn (pattern, operands);
+      return "";
+    }
+
+  /* If last_argument_regno is not mentioned in par_rtx, we can confirm that
+     we do not need to push argument registers for variadic function.
+     But we still need to check if we need to push exception handling
+     data registers.  */
+  if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx))
+    {
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_eh_data);
+      operands[1] = gen_rtx_REG (SImode, re_eh_data);
+      /* Create assembly code pattern: "Rb, Re, { }".  */
+      snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }");
+      /* We use output_asm_insn() to output assembly code by ourself.  */
+      output_asm_insn (pattern, operands);
+      return "";
+    }
+
+  /* If we step here, we are going to do v3push or multiple push operation.  */
+
+  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
+    {
+      /* For stack v3push:
+	   operands[0]: Re
+	   operands[1]: imm8u */
+
+      /* This variable is to check if 'push25 Re,imm8u' is available.  */
+      int sp_adjust;
+
+      /* Set operands[0].  */
+      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
+
+      /* Check if we can generate 'push25 Re,imm8u',
+	 otherwise, generate 'push25 Re,0'.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_gpr_padding_bytes
+		  + cfun->machine->callee_saved_fpr_regs_size;
+      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust))
+	operands[1] = GEN_INT (sp_adjust);
+      else
+	{
+	  /* Allocate callee saved fpr space.  */
+	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	    {
+	      sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes
+			  + cfun->machine->callee_saved_fpr_regs_size;
+	      operands[1] = GEN_INT (sp_adjust);
+	    }
+	  else
+	    {
+	      operands[1] = GEN_INT (0);
+	    }
+	}
+
+      /* Create assembly code pattern.  */
+      snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1");
+    }
+  else
+    {
+      /* For normal stack push multiple:
+	 operands[0]: Rb
+	 operands[1]: Re
+	 operands[2]: En4 */
+
+      /* This variable is used to check if we only need to generate En4 field.
+	 As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+      int push_en4_only_p = 0;
+
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
+      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
+
+      /* 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+      if (!cfun->machine->fp_size
+	  && !cfun->machine->gp_size
+	  && !cfun->machine->lp_size
+	  && REGNO (operands[0]) == SP_REGNUM
+	  && REGNO (operands[1]) == SP_REGNUM)
+	{
+	  /* No need to generate instruction.  */
+	  return "";
+	}
+      else
+	{
+	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+	  if (REGNO (operands[0]) == SP_REGNUM
+	      && REGNO (operands[1]) == SP_REGNUM)
+	    push_en4_only_p = 1;
+
+	  /* Create assembly code pattern.
+	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+	  snprintf (pattern, sizeof (pattern),
+		    "push.s\t%s{%s%s%s }",
+		    push_en4_only_p ? "" : "%0, %1, ",
+		    cfun->machine->fp_size ? " $fp" : "",
+		    cfun->machine->gp_size ? " $gp" : "",
+		    cfun->machine->lp_size ? " $lp" : "");
+	}
+    }
+
+  /* We use output_asm_insn() to output assembly code by ourself.  */
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Function to output stack pop operation.
+   We need to deal with normal stack pop multiple or stack v3pop.  */
+const char *
+nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED)
+{
+  /* A string pattern for output_asm_insn().  */
+  char pattern[100];
+  /* The operands array which will be used in output_asm_insn().  */
+  rtx operands[3];
+  /* Pick up first and last eh data regno for further use.  */
+  int rb_eh_data = cfun->machine->eh_return_data_first_regno;
+  int re_eh_data = cfun->machine->eh_return_data_last_regno;
+  int first_eh_data_regno = EH_RETURN_DATA_REGNO (0);
+  /* Pick up callee-saved first regno and last regno for further use.  */
+  int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno;
+  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
+
+  /* We need to check if we need to push exception handling
+     data registers.  */
+  if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx))
+    {
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_eh_data);
+      operands[1] = gen_rtx_REG (SImode, re_eh_data);
+      /* Create assembly code pattern: "Rb, Re, { }".  */
+      snprintf (pattern, sizeof (pattern), "pop.s\t%s", "%0, %1, { }");
+      /* We use output_asm_insn() to output assembly code by ourself.  */
+      output_asm_insn (pattern, operands);
+      return "";
+    }
+
+  /* If we step here, we are going to do v3pop or multiple pop operation.  */
+
+  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
+    {
+      /* For stack v3pop:
+	   operands[0]: Re
+	   operands[1]: imm8u */
+
+      /* This variable is to check if 'pop25 Re,imm8u' is available.  */
+      int sp_adjust;
+
+      /* Set operands[0].  */
+      operands[0] = gen_rtx_REG (SImode, re_callee_saved);
+
+      /* Check if we can generate 'pop25 Re,imm8u',
+	 otherwise, generate 'pop25 Re,0'.
+	 We have to consider alloca issue as well.
+	 If the function does call alloca(), the stack pointer is not fixed.
+	 In that case, we cannot use 'pop25 Re,imm8u' directly.
+	 We have to caculate stack pointer from frame pointer
+	 and then use 'pop25 Re,0'.  */
+      sp_adjust = cfun->machine->local_size
+		  + cfun->machine->out_args_size
+		  + cfun->machine->callee_saved_area_gpr_padding_bytes
+		  + cfun->machine->callee_saved_fpr_regs_size;
+      if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+	  && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+	  && !cfun->calls_alloca)
+	operands[1] = GEN_INT (sp_adjust);
+      else
+	{
+	  if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM)
+	    {
+	      /* If has fpr need to restore, the $sp on callee saved fpr
+		 position, so we need to consider gpr pading bytes and
+		 callee saved fpr size.  */
+	      sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes
+			  + cfun->machine->callee_saved_fpr_regs_size;
+	      operands[1] = GEN_INT (sp_adjust);
+	    }
+	  else
+	    {
+	      operands[1] = GEN_INT (0);
+	    }
+	}
+
+      /* Create assembly code pattern.  */
+      snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
+    }
+  else
+    {
+      /* For normal stack pop multiple:
+	 operands[0]: Rb
+	 operands[1]: Re
+	 operands[2]: En4 */
+
+      /* This variable is used to check if we only need to generate En4 field.
+	 As long as Rb==Re=SP_REGNUM, we set this variable to 1.  */
+      int pop_en4_only_p = 0;
+
+      /* Set operands[0] and operands[1].  */
+      operands[0] = gen_rtx_REG (SImode, rb_callee_saved);
+      operands[1] = gen_rtx_REG (SImode, re_callee_saved);
+
+      /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+      if (!cfun->machine->fp_size
+	  && !cfun->machine->gp_size
+	  && !cfun->machine->lp_size
+	  && REGNO (operands[0]) == SP_REGNUM
+	  && REGNO (operands[1]) == SP_REGNUM)
+	{
+	  /* No need to generate instruction.  */
+	  return "";
+	}
+      else
+	{
+	  /* If Rb==Re=SP_REGNUM, we only need to generate En4 field.  */
+	  if (REGNO (operands[0]) == SP_REGNUM
+	      && REGNO (operands[1]) == SP_REGNUM)
+	    pop_en4_only_p = 1;
+
+	  /* Create assembly code pattern.
+	     We need to handle the form: "Rb, Re, { $fp $gp $lp }".  */
+	  snprintf (pattern, sizeof (pattern),
+		    "pop.s\t%s{%s%s%s }",
+		    pop_en4_only_p ? "" : "%0, %1, ",
+		    cfun->machine->fp_size ? " $fp" : "",
+		    cfun->machine->gp_size ? " $gp" : "",
+		    cfun->machine->lp_size ? " $lp" : "");
+	}
+    }
+
+  /* We use output_asm_insn() to output assembly code by ourself.  */
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Function to output return operation.  */
+const char *
+nds32_output_return (void)
+{
+  /* A string pattern for output_asm_insn().  */
+  char pattern[100];
+  /* The operands array which will be used in output_asm_insn().  */
+  rtx operands[2];
+  /* For stack v3pop:
+     operands[0]: Re
+     operands[1]: imm8u */
+  int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno;
+  int sp_adjust;
+
+  /* Set operands[0].  */
+  operands[0] = gen_rtx_REG (SImode, re_callee_saved);
+
+  /* Check if we can generate 'pop25 Re,imm8u',
+     otherwise, generate 'pop25 Re,0'.
+     We have to consider alloca issue as well.
+     If the function does call alloca(), the stack pointer is not fixed.
+     In that case, we cannot use 'pop25 Re,imm8u' directly.
+     We have to caculate stack pointer from frame pointer
+     and then use 'pop25 Re,0'.  */
+  sp_adjust = cfun->machine->local_size
+    + cfun->machine->out_args_size
+    + cfun->machine->callee_saved_area_gpr_padding_bytes
+    + cfun->machine->callee_saved_fpr_regs_size;
+  if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust))
+      && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)
+      && !cfun->calls_alloca)
+    operands[1] = GEN_INT (sp_adjust);
+  else
+    operands[1] = GEN_INT (0);
+
+  /* Create assembly code pattern.  */
+  snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1");
+  /* We use output_asm_insn() to output assembly code by ourself.  */
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+
+/* output a float load instruction */
+const char *
+nds32_output_float_load (rtx *operands)
+{
+  char buff[100];
+  const char *pattern;
+  rtx addr, addr_op0, addr_op1;
+  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  addr = XEXP (operands[1], 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      pattern = "fl%ci\t%%0, %%1";
+      break;
+
+    case PLUS:
+      addr_op0 = XEXP (addr, 0);
+      addr_op1 = XEXP (addr, 1);
+
+      if (REG_P (addr_op0) && REG_P (addr_op1))
+	pattern = "fl%c\t%%0, %%1";
+      else if (REG_P (addr_op0) && CONST_INT_P (addr_op1))
+	pattern = "fl%ci\t%%0, %%1";
+      else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1)
+	       && REG_P (XEXP (addr_op0, 0))
+	       && CONST_INT_P (XEXP (addr_op0, 1)))
+	pattern = "fl%c\t%%0, %%1";
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_MODIFY:
+      addr_op0 = XEXP (addr, 0);
+      addr_op1 = XEXP (addr, 1);
+
+      if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
+	  && REG_P (XEXP (addr_op1, 1)))
+	pattern = "fl%c.bi\t%%0, %%1";
+      else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
+	       && CONST_INT_P (XEXP (addr_op1, 1)))
+	pattern = "fl%ci.bi\t%%0, %%1";
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_INC:
+      if (REG_P (XEXP (addr, 0)))
+	{
+	  if (dp)
+	    pattern = "fl%ci.bi\t%%0, %%1, 8";
+	  else
+	    pattern = "fl%ci.bi\t%%0, %%1, 4";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_DEC:
+      if (REG_P (XEXP (addr, 0)))
+	{
+	  if (dp)
+	    pattern = "fl%ci.bi\t%%0, %%1, -8";
+	  else
+	    pattern = "fl%ci.bi\t%%0, %%1, -4";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  sprintf (buff, pattern, dp ? 'd' : 's');
+  output_asm_insn (buff, operands);
+  return "";
+}
+
+/* output a float store instruction */
+const char *
+nds32_output_float_store (rtx *operands)
+{
+  char buff[100];
+  const char *pattern;
+  rtx addr, addr_op0, addr_op1;
+  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  addr = XEXP (operands[0], 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      pattern = "fs%ci\t%%1, %%0";
+      break;
+
+    case PLUS:
+      addr_op0 = XEXP (addr, 0);
+      addr_op1 = XEXP (addr, 1);
+
+      if (REG_P (addr_op0) && REG_P (addr_op1))
+	pattern = "fs%c\t%%1, %%0";
+      else if (REG_P (addr_op0) && CONST_INT_P (addr_op1))
+	pattern = "fs%ci\t%%1, %%0";
+      else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1)
+	       && REG_P (XEXP (addr_op0, 0))
+	       && CONST_INT_P (XEXP (addr_op0, 1)))
+	pattern = "fs%c\t%%1, %%0";
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_MODIFY:
+      addr_op0 = XEXP (addr, 0);
+      addr_op1 = XEXP (addr, 1);
+
+      if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
+	  && REG_P (XEXP (addr_op1, 1)))
+	pattern = "fs%c.bi\t%%1, %%0";
+      else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS
+	       && CONST_INT_P (XEXP (addr_op1, 1)))
+	pattern = "fs%ci.bi\t%%1, %%0";
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_INC:
+      if (REG_P (XEXP (addr, 0)))
+	{
+	  if (dp)
+	    pattern = "fs%ci.bi\t%%1, %%0, 8";
+	  else
+	    pattern = "fs%ci.bi\t%%1, %%0, 4";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    case POST_DEC:
+      if (REG_P (XEXP (addr, 0)))
+	{
+	  if (dp)
+	    pattern = "fs%ci.bi\t%%1, %%0, -8";
+	  else
+	    pattern = "fs%ci.bi\t%%1, %%0, -4";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  sprintf (buff, pattern, dp ? 'd' : 's');
+  output_asm_insn (buff, operands);
+  return "";
+}
+
+const char *
+nds32_output_smw_single_word (rtx *operands)
+{
+  char buff[100];
+  unsigned regno;
+  int enable4;
+  bool update_base_p;
+  rtx base_addr = operands[0];
+  rtx base_reg;
+  rtx otherops[2];
+
+  if (REG_P (XEXP (base_addr, 0)))
+    {
+      update_base_p = false;
+      base_reg = XEXP (base_addr, 0);
+    }
+  else
+    {
+      update_base_p = true;
+      base_reg = XEXP (XEXP (base_addr, 0), 0);
+    }
+
+  const char *update_base = update_base_p ? "m" : "";
+
+  regno = REGNO (operands[1]);
+
+  otherops[0] = base_reg;
+  otherops[1] = operands[1];
+
+  if (regno >= 28)
+    {
+      enable4 = nds32_regno_to_enable4 (regno);
+      sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4);
+    }
+  else
+    {
+      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base);
+    }
+  output_asm_insn (buff, otherops);
+  return "";
+}
+
+const char *
+nds32_output_lmw_single_word (rtx *operands)
+{
+  char buff[100];
+  unsigned regno;
+  bool update_base_p;
+  int enable4;
+  rtx base_addr = operands[1];
+  rtx base_reg;
+  rtx otherops[2];
+
+  if (REG_P (XEXP (base_addr, 0)))
+    {
+      update_base_p = false;
+      base_reg = XEXP (base_addr, 0);
+    }
+  else
+    {
+      update_base_p = true;
+      base_reg = XEXP (XEXP (base_addr, 0), 0);
+    }
+
+  const char *update_base = update_base_p ? "m" : "";
+
+  regno = REGNO (operands[0]);
+
+  otherops[0] = operands[0];
+  otherops[1] = base_reg;
+
+  if (regno >= 28)
+    {
+      enable4 = nds32_regno_to_enable4 (regno);
+      sprintf (buff, "lmw.bi%s\t$sp, [%%1], $sp, %x", update_base, enable4);
+    }
+  else
+    {
+      sprintf (buff, "lmw.bi%s\t%%0, [%%1], %%0", update_base);
+    }
+  output_asm_insn (buff, otherops);
+  return "";
+}
+
+void
+nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
+{
+  /* Initial memory offset.  */
+  int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0;
+  int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1;
+  /* Initial register shift byte.  */
+  int shift = 0;
+  /* The first load byte instruction is not the same. */
+  int width = GET_MODE_SIZE (mode) - 1;
+  rtx mem[2];
+  rtx reg[2];
+  rtx sub_reg;
+  rtx temp_reg, temp_sub_reg;
+  int num_reg;
+
+  /* Generating a series of load byte instructions.
+     The first load byte instructions and other
+     load byte instructions are not the same. like:
+     First:
+       lbi reg0, [mem]
+       zeh reg0, reg0
+     Second:
+       lbi temp_reg, [mem + offset]
+       sll temp_reg, (8 * shift)
+       ior reg0, temp_reg
+
+       lbi temp_reg, [mem + (offset + 1)]
+       sll temp_reg, (8 * (shift + 1))
+       ior reg0, temp_reg  */
+
+  temp_reg = gen_reg_rtx (SImode);
+  temp_sub_reg = gen_lowpart (QImode, temp_reg);
+
+  if (mode == DImode)
+    {
+      /* Load doubleword, we need two registers to access.  */
+      reg[0] = simplify_gen_subreg (SImode, operands[0],
+				    GET_MODE (operands[0]), 0);
+      reg[1] = simplify_gen_subreg (SImode, operands[0],
+				    GET_MODE (operands[0]), 4);
+      /* A register only store 4 byte.  */
+      width = GET_MODE_SIZE (SImode) - 1;
+    }
+  else
+    {
+      reg[0] = operands[0];
+    }
+
+  for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
+    {
+      sub_reg = gen_lowpart (QImode, reg[0]);
+      mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[1], offset));
+
+      /* Generating the first part instructions.
+	   lbi reg0, [mem]  */
+      emit_move_insn (sub_reg, mem[0]);
+
+      while (width > 0)
+	{
+	  offset = offset + offset_adj;
+	  shift++;
+	  width--;
+
+	  mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode,
+						       operands[1],
+						       offset));
+	  /* Generating the second part instructions.
+	       lbi temp_reg, [mem + offset]
+	       sll temp_reg, (8 * shift)
+	       ior reg0, temp_reg  */
+	  emit_move_insn (temp_sub_reg, mem[1]);
+	  emit_insn (gen_ashlsi3 (temp_reg, temp_reg,
+				  GEN_INT (shift * 8)));
+	  emit_insn (gen_iorsi3 (reg[0], reg[0], temp_reg));
+	}
+
+      if (mode == DImode)
+	{
+	  /* Using the second register to load memory information. */
+	  reg[0] = reg[1];
+	  shift = 0;
+	  width = GET_MODE_SIZE (SImode) - 1;
+	  offset = offset + offset_adj;
+	}
+    }
+}
+
+void
+nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode)
+{
+  /* Initial memory offset.  */
+  int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0;
+  int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1;
+  /* Initial register shift byte.  */
+  int shift = 0;
+  /* The first load byte instruction is not the same. */
+  int width = GET_MODE_SIZE (mode) - 1;
+  rtx mem[2];
+  rtx reg[2];
+  rtx sub_reg;
+  rtx temp_reg, temp_sub_reg;
+  int num_reg;
+
+  /* Generating a series of store byte instructions.
+     The first store byte instructions and other
+     load byte instructions are not the same. like:
+     First:
+	sbi  reg0, [mem + 0]
+     Second:
+	srli    temp_reg, reg0, (8 * shift)
+	sbi	temp_reg, [mem + offset]  */
+
+  temp_reg = gen_reg_rtx (SImode);
+  temp_sub_reg = gen_lowpart (QImode, temp_reg);
+
+  if (mode == DImode)
+    {
+      /* Load doubleword, we need two registers to access.  */
+      reg[0] = simplify_gen_subreg (SImode, operands[1],
+				    GET_MODE (operands[1]), 0);
+      reg[1] = simplify_gen_subreg (SImode, operands[1],
+				    GET_MODE (operands[1]), 4);
+      /* A register only store 4 byte.  */
+      width = GET_MODE_SIZE (SImode) - 1;
+    }
+  else
+    {
+      reg[0] = operands[1];
+    }
+
+  for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
+    {
+      sub_reg = gen_lowpart (QImode, reg[0]);
+      mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[0], offset));
+
+      /* Generating the first part instructions.
+	   sbi reg0, [mem + 0] */
+      emit_move_insn (mem[0], sub_reg);
+
+      while (width > 0)
+	{
+	  offset = offset + offset_adj;
+	  shift++;
+	  width--;
+
+	  mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode,
+						       operands[0],
+						       offset));
+	  /* Generating the second part instructions.
+	       srli  temp_reg, reg0, (8 * shift)
+	       sbi   temp_reg, [mem + offset]  */
+	  emit_insn (gen_lshrsi3 (temp_reg, reg[0],
+				  GEN_INT (shift * 8)));
+	  emit_move_insn (mem[1], temp_sub_reg);
+	}
+
+      if (mode == DImode)
+	{
+	  /* Using the second register to load memory information. */
+	  reg[0] = reg[1];
+	  shift = 0;
+	  width = GET_MODE_SIZE (SImode) - 1;
+	  offset = offset + offset_adj;
+	}
+    }
+}
+
+/* Using multiple load/store instruction to output doubleword instruction.  */
+const char *
+nds32_output_double (rtx *operands, bool load_p)
+{
+  char pattern[100];
+  int reg = load_p ? 0 : 1;
+  int mem = load_p ? 1 : 0;
+  rtx otherops[3];
+  rtx addr = XEXP (operands[mem], 0);
+
+  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[reg]));
+  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[reg]) + 1);
+
+  if (GET_CODE (addr)  == POST_INC)
+    {
+      /* (mem (post_inc (reg))) */
+      otherops[2] = XEXP (addr, 0);
+      snprintf (pattern, sizeof (pattern),
+		"%cmw.bim\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's');
+    }
+  else
+    {
+      /* (mem (reg)) */
+      otherops[2] = addr;
+      snprintf (pattern, sizeof (pattern),
+		"%cmw.bi\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's');
+
+    }
+
+  output_asm_insn (pattern, otherops);
+  return "";
+}
+
+const char *
+nds32_output_cbranchsi4_equality_zero (rtx insn, rtx *operands)
+{
+  enum rtx_code code;
+  bool long_jump_p = false;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 8:
+      long_jump_p = true;
+      /* fall through  */
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  /*    b<cond>zs8  .L0
+	      or
+		b<inverse_cond>zs8  .LCB0
+		j  .L0
+	      .LCB0:
+	   */
+	  output_cond_branch_compare_zero (code, "s8", long_jump_p,
+					   operands, true);
+	  return "";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  /*    b<cond>z38  $r0, .L0
+	      or
+		b<inverse_cond>z38  $r0, .LCB0
+		j  .L0
+	      .LCB0:
+	   */
+	  output_cond_branch_compare_zero (code, "38", long_jump_p,
+					   operands, false);
+	  return "";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 10:
+      /* including constraints: t, l, and r */
+      long_jump_p = true;
+      /* fall through  */
+    case 4:
+      /* including constraints: t, l, and r */
+      output_cond_branch_compare_zero (code, "", long_jump_p, operands, false);
+      return "";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+nds32_output_cbranchsi4_equality_reg (rtx insn, rtx *operands)
+{
+  enum rtx_code code;
+  bool long_jump_p, r5_p;
+  int insn_length;
+
+  insn_length = get_attr_length (insn);
+
+  long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false;
+  r5_p = (insn_length == 2 || insn_length == 8) ? true : false;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+      output_cond_branch (code, "", r5_p, long_jump_p, operands);
+      return "";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+nds32_output_cbranchsi4_equality_reg_or_const_int (rtx insn, rtx *operands)
+{
+  enum rtx_code code;
+  bool long_jump_p, r5_p;
+  int insn_length;
+
+  insn_length = get_attr_length (insn);
+
+  long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false;
+  r5_p = (insn_length == 2 || insn_length == 8) ? true : false;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+      if (which_alternative == 2)
+	{
+	  /* r, Is11 */
+	  /* b<cond>c */
+	  output_cond_branch (code, "c", r5_p, long_jump_p, operands);
+	}
+      else
+	{
+	  /* r, r */
+	  /* v, r */
+	  output_cond_branch (code, "", r5_p, long_jump_p, operands);
+	}
+      return "";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+nds32_output_cbranchsi4_greater_less_zero (rtx insn, rtx *operands)
+{
+  enum rtx_code code;
+  bool long_jump_p;
+  int insn_length;
+
+  insn_length = get_attr_length (insn);
+
+  gcc_assert (insn_length == 4 || insn_length == 10);
+
+  long_jump_p = (insn_length == 10) ? true : false;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  switch (code)
+    {
+    case GT:
+    case GE:
+    case LT:
+    case LE:
+      output_cond_branch_compare_zero (code, "", long_jump_p, operands, false);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return "";
+}
+
+const char *
+nds32_output_unpkd8 (rtx output, rtx input,
+		     rtx high_idx_rtx, rtx low_idx_rtx,
+		     bool signed_p)
+{
+  char pattern[100];
+  rtx output_operands[2];
+  HOST_WIDE_INT high_idx, low_idx;
+  high_idx = INTVAL (high_idx_rtx);
+  low_idx = INTVAL (low_idx_rtx);
+
+  gcc_assert (high_idx >= 0 && high_idx <= 3);
+  gcc_assert (low_idx >= 0 && low_idx <= 3);
+
+  /* We only have 10, 20, 30 and 31.  */
+  if ((low_idx != 0 || high_idx == 0) &&
+      !(low_idx == 1 && high_idx == 3))
+    return "#";
+
+  char sign_char = signed_p ? 's' : 'z';
+
+  sprintf (pattern,
+	   "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1",
+	   sign_char, high_idx, low_idx);
+  output_operands[0] = output;
+  output_operands[1] = input;
+  output_asm_insn (pattern, output_operands);
+  return "";
+}
+
+const char *nds32_output_call (rtx insn, rtx *operands,
+			       const char *long_call, const char *call,
+			       bool align_p)
+{
+  char pattern[100];
+  bool noreturn_p;
+  if (TARGET_CMODEL_LARGE)
+    strcpy (pattern, long_call);
+  else
+    strcpy (pattern, call);
+
+  if (align_p)
+    strcat (pattern, "\n\t.align 2");
+
+  noreturn_p = find_reg_note (insn, REG_NORETURN, NULL_RTX) != NULL_RTX;
+
+  if (noreturn_p)
+    {
+      if (TARGET_16_BIT)
+	strcat (pattern, "\n\tnop16");
+      else
+	strcat (pattern, "\n\tnop");
+    }
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+bool
+nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0,
+			rtx in0_idx1, rtx in1_idx1)
+{
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    return false;
+
+  /* smxds.  */
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    return false;
+
+  return true;
+}
+
+const char *
+nds32_output_sms (rtx in0_idx0, rtx in1_idx0,
+		  rtx in0_idx1, rtx in1_idx1)
+{
+  if (nds32_need_split_sms_p (in0_idx0, in1_idx0,
+			      in0_idx1, in1_idx1))
+    return "#";
+  /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */
+
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    {
+      if (INTVAL (in0_idx0) == 0)
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smds\t%0, %1, %2";
+	  else
+	    return "smdrs\t%0, %1, %2";
+	}
+      else
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smdrs\t%0, %1, %2";
+	  else
+	    return "smds\t%0, %1, %2";
+	}
+    }
+
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    {
+      if (INTVAL (in0_idx0) == 1)
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smxds\t%0, %2, %1";
+	  else
+	    return "smxds\t%0, %1, %2";
+	}
+      else
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smxds\t%0, %1, %2";
+	  else
+	    return "smxds\t%0, %2, %1";
+	}
+    }
+
+  gcc_unreachable ();
+  return "";
+}
+
+void
+nds32_split_sms (rtx out, rtx in0, rtx in1,
+		 rtx in0_idx0, rtx in1_idx0,
+		 rtx in0_idx1, rtx in1_idx1)
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, in0, in1,
+			    in0_idx0, in1_idx0));
+  emit_insn (gen_mulhisi3v (result1, in0, in1,
+			    in0_idx1, in1_idx1));
+  emit_insn (gen_subsi3 (out, result0, result1));
+}
+
+/* Spilt a doubleword instrucion to two single word instructions.  */
+void
+nds32_spilt_doubleword (rtx *operands, bool load_p)
+{
+  int reg = load_p ? 0 : 1;
+  int mem = load_p ? 1 : 0;
+  rtx reg_rtx = load_p ? operands[0] : operands[1];
+  rtx mem_rtx = load_p ? operands[1] : operands[0];
+  rtx low_part[2], high_part[2];
+  rtx sub_mem = XEXP (mem_rtx, 0);
+
+  /* Generate low_part and high_part register pattern.
+     i.e. register pattern like:
+     (reg:DI) -> (subreg:SI (reg:DI))
+		 (subreg:SI (reg:DI)) */
+  low_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 0);
+  high_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 4);
+
+  /* Generate low_part and high_part memory pattern.
+     Memory format is (post_dec) will generate:
+       low_part:  lwi.bi reg, [mem], 4
+       high_part: lwi.bi reg, [mem], -12 */
+  if (GET_CODE (sub_mem) == POST_DEC)
+    {
+      /* memory format is (post_dec (reg)),
+	 so that extract (reg) from the (post_dec (reg)) pattern.  */
+      sub_mem = XEXP (sub_mem, 0);
+
+      /* generate low_part and high_part memory format:
+	   low_part:  (post_modify ((reg) (plus (reg) (const 4)))
+	   high_part: (post_modify ((reg) (plus (reg) (const -12))) */
+      low_part[mem] = gen_frame_mem (SImode,
+				     gen_rtx_POST_MODIFY (Pmode, sub_mem,
+							  gen_rtx_PLUS (Pmode,
+							  sub_mem,
+							  GEN_INT (4))));
+      high_part[mem] = gen_frame_mem (SImode,
+				      gen_rtx_POST_MODIFY (Pmode, sub_mem,
+							   gen_rtx_PLUS (Pmode,
+							   sub_mem,
+							   GEN_INT (-12))));
+    }
+  else if (GET_CODE (sub_mem) == POST_MODIFY)
+    {
+      /* Memory format is (post_modify (reg) (plus (reg) (const))),
+	 so that extract (reg) from the post_modify pattern.  */
+      rtx post_mem = XEXP (sub_mem, 0);
+
+      /* Extract (const) from the (post_modify (reg) (plus (reg) (const)))
+	 pattern.  */
+
+      rtx plus_op = XEXP (sub_mem, 1);
+      rtx post_val = XEXP (plus_op, 1);
+
+      /* Generate low_part and high_part memory format:
+	   low_part:  (post_modify ((reg) (plus (reg) (const)))
+	   high_part: ((plus (reg) (const 4))) */
+      low_part[mem] = gen_frame_mem (SImode,
+				     gen_rtx_POST_MODIFY (Pmode, post_mem,
+							  gen_rtx_PLUS (Pmode,
+							  post_mem,
+							  post_val)));
+      high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode,
+							     post_mem,
+							     4));
+    }
+  else
+    {
+      /* memory format: (symbol_ref), (const), (reg + const_int).  */
+      low_part[mem] = adjust_address (mem_rtx, SImode, 0);
+      high_part[mem] = adjust_address (mem_rtx, SImode, 4);
+    }
+
+  /* After reload completed, we have dependent issue by low part register and
+     higt part memory. i.e. we cannot split a sequence
+     like:
+	load $r0, [%r1]
+     spilt to
+	lw  $r0, [%r0]
+	lwi $r1, [%r0 + 4]
+     swap position
+	lwi $r1, [%r0 + 4]
+	lw  $r0, [%r0]
+     For store instruction we don't have a problem.
+
+     When memory format is [post_modify], we need to emit high part instruction,
+     before low part instruction.
+     expamle:
+       load $r0, [%r2], post_val
+     spilt to
+       load $r1, [%r2 + 4]
+       load $r0, [$r2], post_val.  */
+  if ((load_p && reg_overlap_mentioned_p (low_part[0], high_part[1]))
+      || GET_CODE (sub_mem) == POST_MODIFY)
+    {
+      operands[2] = high_part[0];
+      operands[3] = high_part[1];
+      operands[4] = low_part[0];
+      operands[5] = low_part[1];
+    }
+  else
+    {
+      operands[2] = low_part[0];
+      operands[3] = low_part[1];
+      operands[4] = high_part[0];
+      operands[5] = high_part[1];
+    }
+}
+
+void
+nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx src_high_part, src_low_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  /* We need to handle shift more than 32 bit!!!! */
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+	{
+	  rtx ext_start;
+	  ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode);
+
+	  emit_insn (gen_wext (dst_high_part, src, ext_start));
+	  emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount));
+	}
+      else
+	{
+	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+	  emit_insn (gen_ashlsi3 (dst_high_part, src_low_part,
+						 new_shift_amout));
+
+	  emit_move_insn (dst_low_part, GEN_INT (0));
+	}
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      rtx ext_start;
+      ext_start = gen_reg_rtx (SImode);
+
+      /*
+	 if (shiftamount < 32)
+	   dst_low_part = src_low_part << shiftamout
+	   dst_high_part = wext (src, 32 - shiftamount)
+	   # wext can't handle wext (src, 32) since it's only take rb[0:4]
+	   # for extract.
+	   dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+	 else
+	   dst_low_part = 0
+	   dst_high_part = src_low_part << shiftamount & 0x1f
+      */
+
+      emit_insn (gen_subsi3 (ext_start,
+			     gen_int_mode (32, SImode),
+			     shiftamount));
+      emit_insn (gen_wext (dst_high_part_l32, src, ext_start));
+
+      /* Handle for shiftamout == 0.  */
+      emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+			      src_high_part, dst_high_part_l32));
+
+      emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount));
+
+      emit_move_insn (dst_low_part_g32, const0_rtx);
+      emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+      emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part,
+						 new_shift_amout));
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			      dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			      dst_high_part_l32, dst_high_part_g32));
+    }
+}
+
+void
+nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, false);
+}
+
+void
+nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, true);
+}
+
+void
+nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx dst_low_part_l32, dst_high_part_l32;
+  rtx dst_low_part_g32, dst_high_part_g32;
+  rtx select_reg, low5bit, low5bit_inv, minus32sa;
+  rtx dst_low_part_g32_tmph;
+  rtx dst_low_part_g32_tmpl;
+  rtx dst_high_part_l32_tmph;
+  rtx dst_high_part_l32_tmpl;
+
+  rtx src_low_part, src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  shiftamount = force_reg (SImode, shiftamount);
+
+  emit_insn (gen_andsi3 (shiftamount,
+			 shiftamount,
+			 gen_int_mode (0x3f, SImode)));
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  dst_low_part_l32 = gen_reg_rtx (SImode);
+  dst_high_part_l32 = gen_reg_rtx (SImode);
+  dst_low_part_g32 = gen_reg_rtx (SImode);
+  dst_high_part_g32 = gen_reg_rtx (SImode);
+  low5bit = gen_reg_rtx (SImode);
+  low5bit_inv = gen_reg_rtx (SImode);
+  minus32sa = gen_reg_rtx (SImode);
+  select_reg = gen_reg_rtx (SImode);
+
+  dst_low_part_g32_tmph = gen_reg_rtx (SImode);
+  dst_low_part_g32_tmpl = gen_reg_rtx (SImode);
+
+  dst_high_part_l32_tmph = gen_reg_rtx (SImode);
+  dst_high_part_l32_tmpl = gen_reg_rtx (SImode);
+
+  emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+  /* if shiftamount < 32
+       dst_low_part = wext(src, shiftamount)
+     else
+       dst_low_part = ((src_high_part >> (shiftamount & 0x1f))
+		       | (src_low_part << (32 - (shiftamount & 0x1f))))
+  */
+  emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode)));
+  emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit));
+
+  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit));
+  emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv));
+
+  emit_insn (gen_iorsi3 (dst_low_part_g32,
+			 dst_low_part_g32_tmpl,
+			 dst_low_part_g32_tmph));
+
+  emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			  dst_low_part_l32, dst_low_part_g32));
+
+  /* if shiftamount < 32
+       dst_high_part = ((src_high_part >> shiftamount)
+			| (src_low_part << (32 - shiftamount)))
+       dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+     else
+       dst_high_part = wext(src, shiftamount & 0x1f)
+  */
+
+  emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount));
+  emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa));
+
+  emit_insn (gen_iorsi3 (dst_high_part_l32,
+			 dst_high_part_l32_tmpl,
+			 dst_high_part_l32_tmph));
+
+  emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+			  src_high_part, dst_high_part_l32));
+
+  emit_insn (gen_wext (dst_high_part_g32, src, low5bit));
+
+  emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			  dst_high_part_l32, dst_high_part_g32));
+}
+
+/* Return true if OP contains a symbol reference.  */
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Expand PIC code for @GOTOFF and @GOT.
+
+  Example for @GOTOFF:
+
+    la $r0, symbol@GOTOFF
+      -> sethi $ta, hi20(symbol@GOTOFF)
+	 ori $ta, $ta, lo12(symbol@GOTOFF)
+	 add $r0, $ta, $gp
+
+  Example for @GOT:
+
+    la $r0, symbol@GOT
+      -> sethi $ta, hi20(symbol@GOT)
+	 ori $ta, $ta, lo12(symbol@GOT)
+	 lw  $r0, [$ta + $gp]
+*/
+void
+nds32_expand_pic_move (rtx *operands)
+{
+  rtx tmp_reg = gen_reg_rtx (SImode);
+  rtx pat;
+
+  if (GET_CODE (operands[1]) == LABEL_REF
+      || (GET_CODE (operands[1]) == SYMBOL_REF
+	  && (CONSTANT_POOL_ADDRESS_P (operands[1])
+	      || SYMBOL_REF_LOCAL_P (operands[1]))))
+    {
+      pat = gen_rtx_UNSPEC (SImode,
+			       gen_rtvec (1, operands[1]), UNSPEC_GOTOFF);
+      pat = gen_rtx_CONST (SImode, pat);
+      emit_insn (gen_sethi (tmp_reg, pat));
+      emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+      emit_insn (gen_addsi3 (operands[0], pic_offset_table_rtx, tmp_reg));
+      emit_insn (gen_blockage ());
+    }
+  else if (GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, operands[1]), UNSPEC_GOT);
+      pat = gen_rtx_CONST (SImode, pat);
+      emit_insn (gen_sethi (tmp_reg, pat));
+      emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+      rtx got_addr = gen_frame_mem (SImode, gen_rtx_PLUS (Pmode,
+							  pic_offset_table_rtx,
+							  tmp_reg));
+      emit_move_insn (operands[0], got_addr);
+      emit_insn (gen_blockage ());
+    }
+  else if (GET_CODE (operands[1]) == CONST)
+    {
+      pat = XEXP (operands[1], 0);
+      gcc_assert (GET_CODE (pat) == PLUS);
+
+      rtx op0 = XEXP (pat, 0);
+      rtx op1 = XEXP (pat, 1);
+
+      if ((GET_CODE (op0) == LABEL_REF
+	   || (GET_CODE (op0) == SYMBOL_REF
+	       && (CONSTANT_POOL_ADDRESS_P (op0)
+		   || SYMBOL_REF_LOCAL_P (op0))))
+	  && GET_CODE (op1) == CONST_INT)
+	{
+	  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF);
+	  pat = gen_rtx_PLUS (Pmode, pat, op1);
+	  pat = gen_rtx_CONST (Pmode, pat);
+	  emit_insn (gen_sethi (tmp_reg, pat));
+	  emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+	  emit_insn (gen_addsi3 (operands[0], tmp_reg, pic_offset_table_rtx));
+	  emit_insn (gen_blockage ());
+	}
+      else if (GET_CODE (op0) == SYMBOL_REF
+	       && GET_CODE (op1) == CONST_INT)
+	{
+	  /* This is a constant offset from a @GOT symbol reference.  */
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT);
+	  pat = gen_rtx_CONST (SImode, pat);
+	  emit_insn (gen_sethi (tmp_reg, pat));
+	  emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+	  rtx got_addr = gen_frame_mem (SImode,
+					gen_rtx_PLUS (Pmode,
+						      pic_offset_table_rtx,
+						      tmp_reg));
+	  emit_move_insn (operands[0], got_addr);
+	  if (satisfies_constraint_Is15 (op1))
+	    emit_insn (gen_addsi3 (operands[0], operands[0], op1));
+	  else
+	    {
+	      rtx tmp_reg2 = gen_reg_rtx (SImode);
+	      emit_insn (gen_movsi (tmp_reg2, op1));
+	      emit_insn (gen_addsi3 (operands[0], operands[0], tmp_reg2));
+	    }
+	  emit_insn (gen_blockage ());
+	}
+      else
+	{
+	  /* Don't handle this pattern.  */
+	  debug_rtx (operands[1]);
+	  gcc_unreachable ();
+	}
+     }
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+static bool
+nds32_symbol_binds_local_p (const_rtx x)
+{
+  return (SYMBOL_REF_DECL (x)
+	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	  : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Expand call address PIC code for @PLT.
+
+  Example for @PLT:
+
+    bal symbol@PLT
+      -> sethi $rt, hi20(symbol@PLT)
+	 ori $rt, $rt, lo12(symbol@PLT)
+	 add $rt, $rt, $gp
+	 jral $lp, $rt
+*/
+void
+nds32_expand_call_address (rtx *call_op)
+{
+  rtx addr;
+  gcc_assert (MEM_P (*call_op));
+  addr = XEXP (*call_op, 0);
+
+  /* If the function is hidden or internal, compiler will emit pseudo
+     call instruction expanding in assembler.   And the pattern will use
+     GOT_OFFSET_TABLE.  TODO: It can be expanded here but it have to be
+     grouped for relax_hint.  */
+  if (flag_pic)
+    emit_use (pic_offset_table_rtx);
+
+  if (flag_pic && CONSTANT_P (addr) && !nds32_symbol_binds_local_p (addr))
+    {
+      rtx tmp_reg = gen_reg_rtx (SImode);
+      rtx tmp2_reg = gen_reg_rtx (SImode);
+      rtx pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), UNSPEC_PLT);
+      pat = gen_rtx_CONST (SImode, pat);
+      emit_insn (gen_sethi (tmp_reg, pat));
+      emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+      emit_insn (gen_addsi3 (tmp2_reg, tmp_reg, pic_offset_table_rtx));
+      /* Expand jral instruction. */
+      XEXP (*call_op, 0) = tmp2_reg;
+    }
+}
+
+/* Return true X is a indirect call symbol.  */
+bool
+nds32_indirect_call_referenced_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      tree decl = SYMBOL_REF_DECL (x);
+
+      return decl
+	     && (lookup_attribute("indirect_call",
+				  DECL_ATTRIBUTES(decl))
+		 != NULL);
+    }
+
+  return false;
+}
+
+/* Return true if X contains a thread-local symbol.  */
+bool
+nds32_tls_referenced_p (rtx x)
+{
+  if (!targetm.have_tls)
+   return false;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  return false;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+rtx
+nds32_legitimize_tls_address (rtx x)
+{
+  rtx tmp_reg;
+  rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM);
+  rtx pat, insns, ret;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (x))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	/* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill
+	   may destroy the define-use chain anylysis to insert relax_hint.  */
+	if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC)
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD);
+	else
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD);
+
+	pat = gen_rtx_CONST (SImode, pat);
+	ret = gen_rtx_REG (Pmode, 0);
+	/* If we can confirm all clobber reigsters, it doesn't have to use call
+	   instruction.  */
+	insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx);
+	RTL_CONST_CALL_P (insns) = 1;
+	x = ret;
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE);
+	tmp_reg  = gen_reg_rtx (SImode);
+	pat = gen_rtx_CONST (SImode, pat);
+	emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0)));
+	if (flag_pic)
+	  emit_use (pic_offset_table_rtx);
+	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	/* Expand symbol_ref@TPOFF':
+	     sethi $ta, hi20(symbol_ref@TPOFF)
+	     ori   $ta, $ta, lo12(symbol_ref@TPOFF)
+	     add   $r0, $ta, $tp */
+	tmp_reg  = gen_reg_rtx (SImode);
+	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE);
+	pat = gen_rtx_CONST (SImode, pat);
+	emit_insn (gen_sethi (tmp_reg, pat));
+	emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  return x;
+}
+
+void
+nds32_expand_tls_move (rtx *operands)
+{
+  rtx src = operands[1];
+  rtx addend = NULL;
+
+  if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS)
+    {
+      addend = XEXP (XEXP (src, 0), 1);
+      src = XEXP (XEXP (src, 0), 0);
+    }
+
+  src = nds32_legitimize_tls_address (src);
+
+  if (addend)
+    {
+      src = gen_rtx_PLUS (SImode, src, addend);
+      src = force_operand (src, operands[0]);
+    }
+  emit_move_insn (operands[0], src);
+}
+
+void
+nds32_expand_constant (enum machine_mode mode, HOST_WIDE_INT val,
+		       rtx target, rtx source)
+{
+  rtx temp = gen_reg_rtx (mode);
+  int clear_sign_bit_copies = 0;
+  int clear_zero_bit_copies = 0;
+  unsigned HOST_WIDE_INT remainder = val & 0xffffffffUL;
+
+  /* Count number of leading zeros.  */
+  clear_sign_bit_copies =  __builtin_clz (remainder);
+  /* Count number of trailing zeros.  */
+  clear_zero_bit_copies = __builtin_ctz (remainder);
+
+  HOST_WIDE_INT sign_shift_mask = ((0xffffffffUL
+				    << (32 - clear_sign_bit_copies))
+				   & 0xffffffffUL);
+  HOST_WIDE_INT zero_shift_mask = (1 << clear_zero_bit_copies) - 1;
+
+  if (clear_sign_bit_copies > 0 && clear_sign_bit_copies < 17
+      && (remainder | sign_shift_mask) == 0xffffffffUL)
+    {
+      /* Transfer AND to two shifts, example:
+	 a = b & 0x7fffffff => (b << 1) >> 1 */
+      rtx shift = GEN_INT (clear_sign_bit_copies);
+
+      emit_insn (gen_ashlsi3 (temp, source, shift));
+      emit_insn (gen_lshrsi3 (target, temp, shift));
+    }
+  else if (clear_zero_bit_copies > 0 && clear_sign_bit_copies < 17
+	   && (remainder | zero_shift_mask) == 0xffffffffUL)
+    {
+      /* Transfer AND to two shifts, example:
+	 a = b & 0xfff00000 => (b >> 20) << 20 */
+      rtx shift = GEN_INT (clear_zero_bit_copies);
+
+      emit_insn (gen_lshrsi3 (temp, source, shift));
+      emit_insn (gen_ashlsi3 (target, temp, shift));
+    }
+  else
+    {
+      emit_move_insn (temp, GEN_INT (val));
+      emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp));
+    }
+}
+
+/* Auxiliary functions for lwm/smw.  */
+bool
+nds32_valid_smw_lwm_base_p (rtx op)
+{
+  rtx base_addr;
+
+  if (!MEM_P (op))
+    return false;
+
+  base_addr = XEXP (op, 0);
+
+  if (REG_P (base_addr))
+    return true;
+  else
+    {
+      if (GET_CODE (base_addr) == POST_INC
+	  && REG_P (XEXP (base_addr, 0)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Auxiliary functions for manipulation DI mode.  */
+rtx nds32_di_high_part_subreg(rtx reg)
+{
+  unsigned high_part_offset = subreg_highpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+	   SImode, reg,
+	   DImode, high_part_offset);
+}
+
+rtx nds32_di_low_part_subreg(rtx reg)
+{
+  unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+	   SImode, reg,
+	   DImode, low_part_offset);
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Auxiliary function for output TLS patterns.  */
+
+const char *
+nds32_output_tls_desc (rtx *operands)
+{
+  char pattern[1000];
+
+  if (TARGET_RELAX_HINT)
+    snprintf (pattern, sizeof (pattern),
+	      ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t"
+	      ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t"
+	      ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t"
+	      ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t"
+	      ".relax_hint %%1\n\tjral $r15");
+  else
+    snprintf (pattern, sizeof (pattern),
+	      "sethi $r0, hi20(%%0)\n\t"
+	      "ori $r0, $r0, lo12(%%0)\n\t"
+	      "lw $r15, [$r0 + $gp]\n\t"
+	      "add $r0, $r0, $gp\n\t"
+	      "jral $r15");
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+const char *
+nds32_output_tls_ie (rtx *operands)
+{
+  char pattern[1000];
+
+  if (flag_pic)
+  {
+      if (TARGET_RELAX_HINT)
+	snprintf (pattern, sizeof (pattern),
+		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
+		  ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t"
+		  ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]");
+      else
+	snprintf (pattern, sizeof (pattern),
+		  "sethi %%0, hi20(%%1)\n\t"
+		  "ori %%0, %%0, lo12(%%1)\n\t"
+		  "lw %%0, [%%0 + $gp]");
+  }
+  else
+    {
+      if (TARGET_RELAX_HINT)
+	snprintf (pattern, sizeof (pattern),
+		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
+		  ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]");
+      else
+	snprintf (pattern, sizeof (pattern),
+		  "sethi %%0, hi20(%%1)\n\t"
+		  "lwi %%0, [%%0 + lo12(%%1)]");
+    }
+  output_asm_insn (pattern, operands);
+  return "";
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-memory-manipulation.c gcc-4.9.4/gcc/config/nds32/nds32-memory-manipulation.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-memory-manipulation.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-memory-manipulation.c	2016-08-08 20:37:45.506270091 +0200
@@ -0,0 +1,1152 @@
+/* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple
+   and store_multiple pattern of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* This file is divided into six parts:
+
+     PART 1: Auxiliary static function definitions.
+
+     PART 2: Auxiliary function for expand movmem pattern.
+
+     PART 3: Auxiliary function for expand setmem pattern.
+
+     PART 4: Auxiliary function for expand movstr pattern.
+
+     PART 5: Auxiliary function for expand strlen pattern.
+
+     PART 6: Auxiliary function for expand load_multiple/store_multiple
+	     pattern.  */
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 1: Auxiliary static function definitions.  */
+
+static void
+nds32_emit_load_store (rtx reg, rtx mem,
+		       enum machine_mode mode,
+		       int offset, bool load_p)
+{
+  rtx new_mem;
+  new_mem = adjust_address (mem, mode, offset);
+  if (load_p)
+    emit_move_insn (reg, new_mem);
+  else
+    emit_move_insn (new_mem, reg);
+}
+
+static void
+nds32_emit_post_inc_load_store (rtx reg, rtx base_reg,
+				enum machine_mode mode,
+				bool load_p)
+{
+  gcc_assert (GET_MODE (reg) == mode);
+  gcc_assert (GET_MODE (base_reg) == Pmode);
+
+  /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
+     not recognize by gcc, so let gcc combine it at auto_inc_dec pass.  */
+  if (load_p)
+    emit_move_insn (reg,
+		    gen_rtx_MEM (mode,
+				 base_reg));
+  else
+    emit_move_insn (gen_rtx_MEM (mode,
+				 base_reg),
+		    reg);
+
+  emit_move_insn (base_reg,
+		  plus_constant(Pmode, base_reg, GET_MODE_SIZE (mode)));
+}
+
+static void
+nds32_emit_mem_move (rtx src, rtx dst,
+		     enum machine_mode mode,
+		     int addr_offset)
+{
+  gcc_assert (MEM_P (src) && MEM_P (dst));
+  rtx tmp_reg = gen_reg_rtx (mode);
+  nds32_emit_load_store (tmp_reg, src, mode,
+			 addr_offset, /* load_p */ true);
+  nds32_emit_load_store (tmp_reg, dst, mode,
+			 addr_offset, /* load_p */ false);
+}
+
+static void
+nds32_emit_mem_move_block (int base_regno, int count,
+			   rtx *dst_base_reg, rtx *dst_mem,
+			   rtx *src_base_reg, rtx *src_mem,
+			   bool update_base_reg_p)
+{
+  rtx new_base_reg;
+
+  emit_insn (nds32_expand_load_multiple (base_regno, count,
+					 *src_base_reg, *src_mem,
+					 update_base_reg_p, &new_base_reg));
+  if (update_base_reg_p)
+    {
+      *src_base_reg = new_base_reg;
+      *src_mem = gen_rtx_MEM (SImode, *src_base_reg);
+    }
+
+  emit_insn (nds32_expand_store_multiple (base_regno, count,
+					  *dst_base_reg, *dst_mem,
+					  update_base_reg_p, &new_base_reg));
+
+  if (update_base_reg_p)
+    {
+      *dst_base_reg = new_base_reg;
+      *dst_mem = gen_rtx_MEM (SImode, *dst_base_reg);
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 2: Auxiliary function for expand movmem pattern.  */
+
+static bool
+nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem,
+					 rtx size,
+					 rtx alignment)
+{
+  /* Emit loop version of movmem.
+
+       andi    $size_least_3_bit, $size, #~7
+       add     $dst_end, $dst, $size
+       move    $dst_itr, $dst
+       move    $src_itr, $src
+       beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
+       add     $double_word_end, $dst, $size_least_3_bit
+
+     .Ldouble_word_mode_loop:
+       lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+       smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
+       ! move will delete after register allocation
+       move    $src_itr, $src_itr'
+       move    $dst_itr, $dst_itr'
+       ! Not readch upper bound. Loop.
+       bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop
+
+     .Lbyte_mode_entry:
+       beq     $dst_itr, $dst_end, .Lend_label
+     .Lbyte_mode_loop:
+       lbi.bi  $tmp, [$src_itr], #1
+       sbi.bi  $tmp, [$dst_itr], #1
+       ! Not readch upper bound. Loop.
+       bne     $dst_itr, $dst_end, .Lbyte_mode_loop
+     .Lend_label:
+  */
+  rtx dst_base_reg, src_base_reg;
+  rtx dst_itr, src_itr;
+  rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
+  rtx dst_end;
+  rtx size_least_3_bit;
+  rtx double_word_end;
+  rtx double_word_mode_loop, byte_mode_entry, byte_mode_loop, end_label;
+  rtx tmp;
+  rtx mask_least_3_bit;
+  int start_regno;
+  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
+
+  if (TARGET_ISA_V3M && !align_to_4_bytes)
+    return 0;
+
+  if (TARGET_REDUCED_REGS)
+    start_regno = 2;
+  else
+    start_regno = 16;
+
+  dst_itr = gen_reg_rtx (Pmode);
+  src_itr = gen_reg_rtx (Pmode);
+  dst_end = gen_reg_rtx (Pmode);
+  tmp = gen_reg_rtx (QImode);
+  mask_least_3_bit = GEN_INT (~7);
+
+  double_word_mode_loop = gen_label_rtx ();
+  byte_mode_entry = gen_label_rtx ();
+  byte_mode_loop = gen_label_rtx ();
+  end_label = gen_label_rtx ();
+
+  dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
+  src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
+  /* andi   $size_least_3_bit, $size, #~7 */
+  size_least_3_bit = expand_binop (SImode, and_optab, size, mask_least_3_bit,
+				   NULL_RTX, 0, OPTAB_WIDEN);
+  /* add     $dst_end, $dst, $size */
+  dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
+			  NULL_RTX, 0, OPTAB_WIDEN);
+
+  /* move    $dst_itr, $dst
+     move    $src_itr, $src */
+  emit_move_insn (dst_itr, dst_base_reg);
+  emit_move_insn (src_itr, src_base_reg);
+
+  /* beqz    $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
+  emit_cmp_and_jump_insns (size_least_3_bit, const0_rtx, EQ, NULL,
+			   SImode, 1, byte_mode_entry);
+  /* add     $double_word_end, $dst, $size_least_3_bit */
+  double_word_end = expand_binop (Pmode, add_optab,
+				  dst_base_reg, size_least_3_bit,
+				  NULL_RTX, 0, OPTAB_WIDEN);
+
+  /* .Ldouble_word_mode_loop: */
+  emit_label (double_word_mode_loop);
+  /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+     smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
+  src_itr_m = src_itr;
+  dst_itr_m = dst_itr;
+  srcmem_m = srcmem;
+  dstmem_m = dstmem;
+  nds32_emit_mem_move_block (start_regno, 2,
+			     &dst_itr_m, &dstmem_m,
+			     &src_itr_m, &srcmem_m,
+			     true);
+  /* move    $src_itr, $src_itr'
+     move    $dst_itr, $dst_itr' */
+  emit_move_insn (dst_itr, dst_itr_m);
+  emit_move_insn (src_itr, src_itr_m);
+
+  /* ! Not readch upper bound. Loop.
+     bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
+  emit_cmp_and_jump_insns (double_word_end, dst_itr, NE, NULL,
+			   Pmode, 1, double_word_mode_loop);
+  /* .Lbyte_mode_entry: */
+  emit_label (byte_mode_entry);
+
+  /* beq     $dst_itr, $dst_end, .Lend_label */
+  emit_cmp_and_jump_insns (dst_itr, dst_end, EQ, NULL,
+			   Pmode, 1, end_label);
+  /* .Lbyte_mode_loop: */
+  emit_label (byte_mode_loop);
+
+  emit_insn (gen_no_hwloop ());
+  /* lbi.bi  $tmp, [$src_itr], #1 */
+  nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
+
+  /* sbi.bi  $tmp, [$dst_itr], #1 */
+  nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
+  /* ! Not readch upper bound. Loop.
+     bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
+  emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
+			   SImode, 1, byte_mode_loop);
+
+  /* .Lend_label: */
+  emit_label (end_label);
+
+  return true;
+}
+
+static bool
+nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
+				       rtx size, rtx alignment)
+{
+  return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
+						  size, alignment);
+}
+
+static bool
+nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem,
+			    rtx size, rtx alignment)
+{
+  if (CONST_INT_P (size))
+    return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem,
+						  size, alignment);
+  else
+    return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
+						    size, alignment);
+}
+
+static bool
+nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem,
+			      rtx total_bytes, rtx alignment)
+{
+  rtx dst_base_reg, src_base_reg;
+  rtx tmp_reg;
+  int maximum_bytes;
+  int maximum_bytes_per_inst;
+  int maximum_regs;
+  int start_regno;
+  int i, inst_num;
+  HOST_WIDE_INT remain_bytes, remain_words;
+  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
+  bool align_to_2_bytes = (INTVAL (alignment) & 1) == 0;
+
+  /* Because reduced-set regsiters has few registers
+     (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
+      cannot be used for register allocation),
+     using 8 registers (32 bytes) for moving memory block
+     may easily consume all of them.
+     It makes register allocation/spilling hard to work.
+     So we only allow maximum=4 registers (16 bytes) for
+     moving memory block under reduced-set registers.  */
+  if (TARGET_REDUCED_REGS)
+    {
+      maximum_regs  = 4;
+      maximum_bytes = 64;
+      start_regno   = 2;
+    }
+  else
+    {
+      if (TARGET_LINUX_ABI)
+	{
+	  /* $r25 is $tp so we use up to 8 registers if using Linux ABI.  */
+	  maximum_regs  = 8;
+	  maximum_bytes = 160;
+	  start_regno   = 16;
+	}
+      else
+	{
+	  maximum_regs  = 10;
+	  maximum_bytes = 160;
+	  start_regno   = 16;
+	}
+    }
+  maximum_bytes_per_inst = maximum_regs * UNITS_PER_WORD;
+
+  /* 1. Total_bytes is integer for sure.
+     2. Alignment is integer for sure.
+     3. Maximum 4 or 10 registers and up to 4 instructions,
+	4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
+     4. The dstmem cannot be volatile memory access.
+     5. The srcmem cannot be volatile memory access.
+     6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
+	support unalign access with v3m configure.  */
+  if (GET_CODE (total_bytes) != CONST_INT
+      || GET_CODE (alignment) != CONST_INT
+      || INTVAL (total_bytes) > maximum_bytes
+      || MEM_VOLATILE_P (dstmem)
+      || MEM_VOLATILE_P (srcmem)
+      || (TARGET_ISA_V3M && !align_to_4_bytes))
+    return false;
+
+  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
+  remain_bytes = INTVAL (total_bytes);
+
+  /* Do not update base address for last lmw/smw pair.  */
+  inst_num = ((INTVAL (total_bytes) + (maximum_bytes_per_inst - 1))
+	      / maximum_bytes_per_inst) - 1;
+
+  for (i = 0; i < inst_num; i++)
+    {
+      nds32_emit_mem_move_block (start_regno, maximum_regs,
+				 &dst_base_reg, &dstmem,
+				 &src_base_reg, &srcmem,
+				 true);
+    }
+  remain_bytes -= maximum_bytes_per_inst * inst_num;
+
+  remain_words = remain_bytes / UNITS_PER_WORD;
+  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
+
+  if (remain_words != 0)
+    {
+      if (remain_bytes != 0)
+	nds32_emit_mem_move_block (start_regno, remain_words,
+				   &dst_base_reg, &dstmem,
+				   &src_base_reg, &srcmem,
+				   true);
+      else
+	{
+	  /* Do not update address if no further byte to move.  */
+	  if (remain_words == 1)
+	   {
+	      /* emit move instruction if align to 4 byte and only 1
+		 word to move.  */
+	      if (align_to_4_bytes)
+		nds32_emit_mem_move (srcmem, dstmem, SImode, 0);
+	      else
+		{
+		  tmp_reg = gen_reg_rtx (SImode);
+		  emit_insn (
+		    gen_unaligned_load_w (tmp_reg,
+					  gen_rtx_MEM (SImode, src_base_reg)));
+		  emit_insn (
+		    gen_unaligned_store_w (gen_rtx_MEM (SImode, dst_base_reg),
+					   tmp_reg));
+		}
+	    }
+	  else
+	    nds32_emit_mem_move_block (start_regno, remain_words,
+				       &dst_base_reg, &dstmem,
+				       &src_base_reg, &srcmem,
+				       false);
+	}
+    }
+
+  switch (remain_bytes)
+    {
+    case 3:
+    case 2:
+      {
+	if (align_to_2_bytes)
+	  nds32_emit_mem_move (srcmem, dstmem, HImode, 0);
+	else
+	  {
+	    nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
+	    nds32_emit_mem_move (srcmem, dstmem, QImode, 1);
+	  }
+
+	if (remain_bytes == 3)
+	  nds32_emit_mem_move (srcmem, dstmem, QImode, 2);
+	break;
+      }
+    case 1:
+      nds32_emit_mem_move (srcmem, dstmem, QImode, 0);
+      break;
+    case 0:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Successfully create patterns, return true.  */
+  return true;
+}
+
+/* Function to move block memory content by
+   using load_multiple and store_multiple.
+   This is auxiliary extern function to help create rtx template.
+   Check nds32-multiple.md file for the patterns.  */
+bool
+nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
+{
+  if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment))
+    return true;
+
+  if (!optimize_size && optimize > 2)
+    return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment);
+
+  return false;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 3: Auxiliary function for expand setmem pattern.  */
+
+static rtx
+nds32_gen_dup_4_byte_to_word_value (rtx value)
+{
+  rtx value4word = gen_reg_rtx (SImode);
+
+  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
+
+  if (CONST_INT_P (value))
+    {
+      unsigned HOST_WIDE_INT val = UINTVAL (value) & GET_MODE_MASK(QImode);
+      rtx new_val = gen_int_mode (val | (val << 8)
+				  | (val << 16) | (val << 24), SImode);
+      /* Just calculate at here if it's constant value.  */
+      emit_move_insn (value4word, new_val);
+    }
+  else
+    {
+      if (NDS32_EXT_DSP_P ())
+	{
+	  /* ! prepare word
+	     andi    $tmp, $value, 0xff    ! $tmp  <- 0x000000ab
+	     insb    $tmp, $tmp, 1         ! $tmp  <- 0x0000abab
+	     pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
+
+	  rtx tmp;
+	  tmp = expand_binop (SImode, and_optab, value,
+			       gen_int_mode (0xff, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_insn (
+	    gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
+
+	  emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
+	}
+      else
+	{
+	  /* ! prepare word
+	     andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
+	     slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
+	     or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
+	     slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
+	     or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
+
+	  rtx tmp1, tmp2, tmp3, tmp4, final_value;
+	  tmp1 = expand_binop (SImode, and_optab, value,
+			       gen_int_mode (0xff, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp2 = expand_binop (SImode, ashl_optab, tmp1,
+			       gen_int_mode (8, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp4 = expand_binop (SImode, ashl_optab, tmp3,
+			       gen_int_mode (16, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+
+	  final_value = expand_binop (SImode, ior_optab, tmp3, tmp4,
+				      NULL_RTX, 0, OPTAB_WIDEN);
+	  emit_move_insn (value4word, final_value);
+	}
+    }
+
+  return value4word;
+}
+
+static rtx
+emit_setmem_word_loop (rtx itr, rtx size, rtx value)
+{
+  rtx word_mode_label = gen_label_rtx ();
+  rtx word_mode_end_label = gen_label_rtx ();
+  rtx byte_mode_size = gen_reg_rtx (SImode);
+  rtx byte_mode_size_tmp = gen_reg_rtx (SImode);
+  rtx word_mode_end = gen_reg_rtx (SImode);
+  rtx size_for_word = gen_reg_rtx (SImode);
+
+  /* and     $size_for_word, $size, #~3  */
+  size_for_word = expand_binop (SImode, and_optab, size,
+				gen_int_mode (~3, SImode),
+				NULL_RTX, 0, OPTAB_WIDEN);
+
+  emit_move_insn (byte_mode_size, size);
+
+  /* beqz    $size_for_word, .Lbyte_mode_entry  */
+  emit_cmp_and_jump_insns (size_for_word, const0_rtx, EQ, NULL,
+			   SImode, 1, word_mode_end_label);
+  /* add     $word_mode_end, $dst, $size_for_word  */
+  word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
+				NULL_RTX, 0, OPTAB_WIDEN);
+
+  /* andi    $byte_mode_size, $size, 3  */
+  byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3),
+				     NULL_RTX, 0, OPTAB_WIDEN);
+
+  emit_move_insn (byte_mode_size, byte_mode_size_tmp);
+
+  /* .Lword_mode:  */
+  emit_label (word_mode_label);
+  /*   ! word-mode set loop
+       smw.bim $value4word, [$dst_itr], $value4word, 0
+       bne     $word_mode_end, $dst_itr, .Lword_mode  */
+  emit_insn (gen_unaligned_store_update_base_w (itr,
+						itr,
+						value));
+  emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
+			   Pmode, 1, word_mode_label);
+
+  emit_label (word_mode_end_label);
+
+  return byte_mode_size;
+}
+
+static rtx
+emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
+{
+  rtx end  = gen_reg_rtx (Pmode);
+  rtx byte_mode_label = gen_label_rtx ();
+  rtx end_label = gen_label_rtx ();
+
+  value = force_reg (QImode, value);
+
+  if (need_end)
+    end = expand_binop (Pmode, add_optab, itr, size,
+			NULL_RTX, 0, OPTAB_WIDEN);
+  /*   beqz    $byte_mode_size, .Lend
+       add     $byte_mode_end, $dst_itr, $byte_mode_size  */
+  emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL,
+			   SImode, 1, end_label);
+
+  if (!need_end)
+    end = expand_binop (Pmode, add_optab, itr, size,
+			NULL_RTX, 0, OPTAB_WIDEN);
+
+  /* .Lbyte_mode:  */
+  emit_label (byte_mode_label);
+
+  emit_insn (gen_no_hwloop ());
+  /*   ! byte-mode set loop
+       sbi.bi  $value, [$dst_itr] ,1
+       bne     $byte_mode_end, $dst_itr, .Lbyte_mode */
+  nds32_emit_post_inc_load_store (value, itr, QImode, false);
+
+  emit_cmp_and_jump_insns (end, itr, NE, NULL,
+			   Pmode, 1, byte_mode_label);
+  /* .Lend: */
+  emit_label (end_label);
+
+  if (need_end)
+    return end;
+  else
+    return NULL_RTX;
+}
+
+static bool
+nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
+{
+  rtx value4word;
+  rtx value4byte;
+  rtx dst;
+  rtx byte_mode_size;
+
+  /* Emit loop version of setmem.
+     memset:
+       ! prepare word
+       andi    $tmp1, $val, 0xff               ! $tmp1  <- 0x000000ab
+       slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
+       or      $tmp3, $val, $tmp2              ! $tmp3  <- 0x0000abab
+       slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
+       or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab
+
+       and     $size_for_word, $size, #-4
+       beqz    $size_for_word, .Lword_mode_end
+
+       add     $word_mode_end, $dst, $size_for_word
+       andi    $byte_mode_size, $size, 3
+
+     .Lword_mode:
+       ! word-mode set loop
+       smw.bim $value4word, [$dst], $value4word, 0
+       bne     $word_mode_end, $dst, .Lword_mode
+
+     .Lword_mode_end:
+       beqz    $byte_mode_size, .Lend
+       add     $byte_mode_end, $dst, $byte_mode_size
+
+     .Lbyte_mode:
+       ! byte-mode set loop
+       sbi.bi  $value4word, [$dst] ,1
+       bne     $byte_mode_end, $dst, .Lbyte_mode
+     .Lend: */
+
+  dst = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+
+  /* ! prepare word
+     andi    $tmp1, $value, 0xff             ! $tmp1  <- 0x000000ab
+     slli    $tmp2, $tmp1, 8                 ! $tmp2  <- 0x0000ab00
+     or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab
+     slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
+     or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */
+  value4word = nds32_gen_dup_4_byte_to_word_value (value);
+
+  /*   and     $size_for_word, $size, #-4
+       beqz    $size_for_word, .Lword_mode_end
+
+       add     $word_mode_end, $dst, $size_for_word
+       andi    $byte_mode_size, $size, 3
+
+     .Lword_mode:
+       ! word-mode set loop
+       smw.bim $value4word, [$dst], $value4word, 0
+       bne     $word_mode_end, $dst, .Lword_mode
+     .Lword_mode_end:  */
+  byte_mode_size = emit_setmem_word_loop (dst, size, value4word);
+
+  /*   beqz    $byte_mode_size, .Lend
+       add     $byte_mode_end, $dst, $byte_mode_size
+
+     .Lbyte_mode:
+       ! byte-mode set loop
+       sbi.bi  $value, [$dst] ,1
+       bne     $byte_mode_end, $dst, .Lbyte_mode
+     .Lend: */
+
+  value4byte = simplify_gen_subreg (QImode, value4word, SImode,
+				    subreg_lowpart_offset (QImode, SImode));
+
+  emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
+
+  return true;
+}
+
+static bool
+nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
+{
+  rtx base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
+  rtx need_align_bytes = gen_reg_rtx (SImode);
+  rtx last_2_bit = gen_reg_rtx (SImode);
+  rtx byte_loop_base = gen_reg_rtx (SImode);
+  rtx byte_loop_size = gen_reg_rtx (SImode);
+  rtx remain_size = gen_reg_rtx (SImode);
+  rtx new_base_reg;
+  rtx value4byte, value4word;
+  rtx byte_mode_size;
+  rtx last_byte_loop_label = gen_label_rtx ();
+
+  size = force_reg (SImode, size);
+
+  value4word = nds32_gen_dup_4_byte_to_word_value (value);
+  value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0);
+
+  emit_move_insn (byte_loop_size, size);
+  emit_move_insn (byte_loop_base, base_reg);
+
+  /* Jump to last byte loop if size is less than 16.  */
+  emit_cmp_and_jump_insns (size, gen_int_mode (16, SImode), LE, NULL,
+			   SImode, 1, last_byte_loop_label);
+
+  /* Make sure align to 4 byte first since v3m can't unalign access.  */
+  emit_insn (gen_andsi3 (last_2_bit,
+			 base_reg,
+			 gen_int_mode (0x3, SImode)));
+
+  emit_insn (gen_subsi3 (need_align_bytes,
+			 gen_int_mode (4, SImode),
+			 last_2_bit));
+
+  /* Align to 4 byte. */
+  new_base_reg = emit_setmem_byte_loop (base_reg,
+					need_align_bytes,
+					value4byte,
+					true);
+
+  /* Calculate remain size. */
+  emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
+
+  /* Set memory word by word. */
+  byte_mode_size = emit_setmem_word_loop (new_base_reg,
+					  remain_size,
+					  value4word);
+
+  emit_move_insn (byte_loop_base, new_base_reg);
+  emit_move_insn (byte_loop_size, byte_mode_size);
+
+  emit_label (last_byte_loop_label);
+
+  /* And set memory for remain bytes. */
+  emit_setmem_byte_loop (byte_loop_base, byte_loop_size, value4byte, false);
+  return true;
+}
+
+static bool
+nds32_expand_setmem_unroll (rtx dstmem, rtx size, rtx value,
+			    rtx align ATTRIBUTE_UNUSED,
+			    rtx expected_align ATTRIBUTE_UNUSED,
+			    rtx expected_size ATTRIBUTE_UNUSED)
+{
+  unsigned maximum_regs, maximum_bytes, start_regno, regno;
+  rtx value4word;
+  rtx dst_base_reg, new_base_reg;
+  unsigned HOST_WIDE_INT remain_bytes, remain_words, prepare_regs, fill_per_smw;
+  unsigned HOST_WIDE_INT real_size;
+
+  if (TARGET_REDUCED_REGS)
+    {
+      maximum_regs  = 4;
+      maximum_bytes = 64;
+      start_regno   = 2;
+    }
+  else
+    {
+      maximum_regs  = 8;
+      maximum_bytes = 128;
+      start_regno   = 16;
+    }
+
+  real_size = UINTVAL (size) & GET_MODE_MASK(SImode);
+
+  if (!(CONST_INT_P (size) && real_size <= maximum_bytes))
+    return false;
+
+  remain_bytes = real_size;
+
+  gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
+
+  value4word = nds32_gen_dup_4_byte_to_word_value (value);
+
+  prepare_regs = remain_bytes / UNITS_PER_WORD;
+
+  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+
+  if (prepare_regs > maximum_regs)
+    prepare_regs = maximum_regs;
+
+  fill_per_smw = prepare_regs * UNITS_PER_WORD;
+
+  regno = start_regno;
+  switch (prepare_regs)
+    {
+    case 2:
+    default:
+      {
+	rtx reg0 = gen_rtx_REG (SImode, regno);
+	rtx reg1 = gen_rtx_REG (SImode, regno+1);
+	unsigned last_regno = start_regno + prepare_regs - 1;
+
+	emit_move_insn (reg0, value4word);
+	emit_move_insn (reg1, value4word);
+	rtx regd = gen_rtx_REG (DImode, regno);
+	regno += 2;
+
+	/* Try to utilize movd44!  */
+	while (regno <= last_regno)
+	  {
+	    if ((regno + 1) <=last_regno)
+	      {
+		rtx reg = gen_rtx_REG (DImode, regno);
+		emit_move_insn (reg, regd);
+		regno += 2;
+	      }
+	    else
+	      {
+		rtx reg = gen_rtx_REG (SImode, regno);
+		emit_move_insn (reg, reg0);
+		regno += 1;
+	      }
+	  }
+	break;
+      }
+    case 1:
+      {
+	rtx reg = gen_rtx_REG (SImode, regno++);
+	emit_move_insn (reg, value4word);
+      }
+      break;
+    case 0:
+      break;
+    }
+
+  if (fill_per_smw)
+    for (;remain_bytes >= fill_per_smw;remain_bytes -= fill_per_smw)
+      {
+	emit_insn (nds32_expand_store_multiple (start_regno, prepare_regs,
+						dst_base_reg, dstmem,
+						true, &new_base_reg));
+	dst_base_reg = new_base_reg;
+	dstmem = gen_rtx_MEM (SImode, dst_base_reg);
+      }
+
+  remain_words = remain_bytes / UNITS_PER_WORD;
+
+  if (remain_words)
+    {
+      emit_insn (nds32_expand_store_multiple (start_regno, remain_words,
+					      dst_base_reg, dstmem,
+					      true, &new_base_reg));
+      dst_base_reg = new_base_reg;
+      dstmem = gen_rtx_MEM (SImode, dst_base_reg);
+    }
+
+  remain_bytes = remain_bytes - (remain_words * UNITS_PER_WORD);
+
+  if (remain_bytes)
+    {
+      value = simplify_gen_subreg (QImode, value4word, SImode,
+				   subreg_lowpart_offset(QImode, SImode));
+      int offset = 0;
+      for (;remain_bytes;--remain_bytes, ++offset)
+	{
+	  nds32_emit_load_store (value, dstmem, QImode, offset, false);
+	}
+    }
+
+  return true;
+}
+
+bool
+nds32_expand_setmem (rtx dstmem, rtx size, rtx value, rtx align,
+		     rtx expected_align,
+		     rtx expected_size)
+{
+  bool align_to_4_bytes = (INTVAL (align) & 3) == 0;
+
+  /* Only expand at O3 */
+  if (optimize_size || optimize < 3)
+    return false;
+
+  if (TARGET_ISA_V3M && !align_to_4_bytes)
+    return nds32_expand_setmem_loop_v3m (dstmem, size, value);
+
+  if (nds32_expand_setmem_unroll (dstmem, size, value,
+				  align, expected_align, expected_size))
+    return true;
+
+  return nds32_expand_setmem_loop (dstmem, size, value);
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 4: Auxiliary function for expand movstr pattern.  */
+
+bool
+nds32_expand_movstr (rtx dst_end_ptr,
+		     rtx dstmem,
+		     rtx srcmem)
+{
+  rtx tmp;
+  rtx dst_base_reg, src_base_reg;
+  rtx new_dst_base_reg, new_src_base_reg;
+  rtx last_non_null_char_ptr;
+  rtx ffbi_result;
+  rtx loop_label;
+
+  if (optimize_size || optimize < 3)
+    return false;
+
+  tmp = gen_reg_rtx (SImode);
+  ffbi_result = gen_reg_rtx (Pmode);
+  new_dst_base_reg = gen_reg_rtx (Pmode);
+  new_src_base_reg = gen_reg_rtx (Pmode);
+  dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0));
+  src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0));
+  loop_label = gen_label_rtx ();
+
+  emit_label (loop_label);
+  emit_insn (gen_lmwzb (new_src_base_reg, src_base_reg, tmp));
+  emit_insn (gen_smwzb (new_dst_base_reg, dst_base_reg, tmp));
+  emit_insn (gen_unspec_ffb (ffbi_result, tmp, const0_rtx));
+
+  emit_move_insn (src_base_reg, new_src_base_reg);
+  emit_move_insn (dst_base_reg, new_dst_base_reg);
+
+  emit_cmp_and_jump_insns (ffbi_result, const0_rtx, EQ, NULL,
+			   SImode, 1, loop_label);
+
+  last_non_null_char_ptr = expand_binop (Pmode, add_optab, dst_base_reg,
+					 ffbi_result, NULL_RTX, 0, OPTAB_WIDEN);
+
+  emit_move_insn (dst_end_ptr, last_non_null_char_ptr);
+
+  return true;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 5: Auxiliary function for expand strlen pattern.  */
+
+bool
+nds32_expand_strlen (rtx result, rtx str,
+		     rtx target_char, rtx align ATTRIBUTE_UNUSED)
+{
+  rtx base_reg, backup_base_reg;
+  rtx ffb_result;
+  rtx target_char_ptr, length;
+  rtx loop_label, tmp;
+
+  if (optimize_size || optimize < 3)
+    return false;
+
+  gcc_assert (MEM_P (str));
+  gcc_assert (CONST_INT_P (target_char) || REG_P (target_char));
+
+  base_reg = copy_to_mode_reg (SImode, XEXP (str, 0));
+  loop_label = gen_label_rtx ();
+
+  ffb_result = gen_reg_rtx (Pmode);
+  tmp = gen_reg_rtx (SImode);
+  backup_base_reg = gen_reg_rtx (SImode);
+
+  /* Emit loop version of strlen.
+       move  $backup_base, $base
+     .Lloop:
+       lmw.bim $tmp, [$base], $tmp, 0
+       ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?
+       beqz  $ffb_result, .Lloop
+       add   $last_char_ptr, $base, $ffb_result
+       sub   $length, $last_char_ptr, $backup_base  */
+
+  /* move  $backup_base, $base  */
+  emit_move_insn (backup_base_reg, base_reg);
+
+  /* .Lloop:  */
+  emit_label (loop_label);
+  /* lmw.bim $tmp, [$base], $tmp, 0  */
+  emit_insn (gen_unaligned_load_update_base_w (base_reg, tmp, base_reg));
+
+  /*  ffb   $ffb_result, $tmp, $target_char   ! is there $target_char?  */
+  emit_insn (gen_unspec_ffb (ffb_result, tmp, target_char));
+
+  /* beqz  $ffb_result, .Lloop  */
+  emit_cmp_and_jump_insns (ffb_result, const0_rtx, EQ, NULL,
+			   SImode, 1, loop_label);
+
+  /* add   $target_char_ptr, $base, $ffb_result   */
+  target_char_ptr = expand_binop (Pmode, add_optab, base_reg,
+				ffb_result, NULL_RTX, 0, OPTAB_WIDEN);
+
+  /* sub   $length, $target_char_ptr, $backup_base  */
+  length = expand_binop (Pmode, sub_optab, target_char_ptr,
+			 backup_base_reg, NULL_RTX, 0, OPTAB_WIDEN);
+
+  emit_move_insn (result, length);
+
+  return true;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* PART 6: Auxiliary function for expand load_multiple/store_multiple
+	   pattern.  */
+
+/* Functions to expand load_multiple and store_multiple.
+   They are auxiliary extern functions to help create rtx template.
+   Check nds32-multiple.md file for the patterns.  */
+rtx
+nds32_expand_load_multiple (int base_regno, int count,
+			    rtx base_addr, rtx basemem,
+			    bool update_base_reg_p,
+			    rtx *update_base_reg)
+{
+  int par_index;
+  int offset;
+  int start_idx;
+  rtx result;
+  rtx new_addr, mem, reg;
+
+  /* Generate a unaligned load to prevent load instruction pull out from
+     parallel, and then it will generate lwi, and lose unaligned acces */
+  if (count == 1)
+    {
+      reg = gen_rtx_REG (SImode, base_regno);
+      if (update_base_reg_p)
+	{
+	  *update_base_reg = gen_reg_rtx (SImode);
+	  return gen_unaligned_load_update_base_w (*update_base_reg, reg, base_addr);
+	}
+      else
+	return gen_unaligned_load_w (reg, gen_rtx_MEM (SImode, base_addr));
+    }
+
+  /* Create the pattern that is presented in nds32-multiple.md.  */
+  if (update_base_reg_p)
+    {
+      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
+      start_idx = 1;
+    }
+  else
+    {
+      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+      start_idx = 0;
+    }
+
+  if (update_base_reg_p)
+    {
+      offset           = count * 4;
+      new_addr         = plus_constant (Pmode, base_addr, offset);
+      *update_base_reg = gen_reg_rtx (SImode);
+
+      XVECEXP (result, 0, 0) = gen_rtx_SET (VOIDmode,
+					    *update_base_reg, new_addr);
+    }
+
+  for (par_index = 0; par_index < count; par_index++)
+    {
+      offset   = par_index * 4;
+      /* 4-byte for loading data to each register.  */
+      new_addr = plus_constant (Pmode, base_addr, offset);
+      mem      = adjust_automodify_address_nv (basemem, SImode,
+					       new_addr, offset);
+      reg      = gen_rtx_REG (SImode, base_regno + par_index);
+
+      XVECEXP (result, 0, (par_index + start_idx)) = gen_rtx_SET (VOIDmode, reg, mem);
+    }
+
+  return result;
+}
+
+rtx
+nds32_expand_store_multiple (int base_regno, int count,
+			     rtx base_addr, rtx basemem,
+			     bool update_base_reg_p,
+			     rtx *update_base_reg)
+{
+  int par_index;
+  int offset;
+  int start_idx;
+  rtx result;
+  rtx new_addr, mem, reg;
+
+  if (count == 1)
+    {
+      reg = gen_rtx_REG (SImode, base_regno);
+      if (update_base_reg_p)
+	{
+	  *update_base_reg = gen_reg_rtx (SImode);
+	  return gen_unaligned_store_update_base_w (*update_base_reg, base_addr, reg);
+	}
+      else
+	return gen_unaligned_store_w (gen_rtx_MEM (SImode, base_addr), reg);
+    }
+
+  /* Create the pattern that is presented in nds32-multiple.md.  */
+
+  if (update_base_reg_p)
+    {
+      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
+      start_idx = 1;
+    }
+  else
+    {
+      result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+      start_idx = 0;
+    }
+
+  if (update_base_reg_p)
+    {
+      offset           = count * 4;
+      new_addr         = plus_constant (Pmode, base_addr, offset);
+      *update_base_reg = gen_reg_rtx (SImode);
+
+      XVECEXP (result, 0, 0) = gen_rtx_SET (VOIDmode,
+					    *update_base_reg, new_addr);
+    }
+
+  for (par_index = 0; par_index < count; par_index++)
+    {
+      offset   = par_index * 4;
+      /* 4-byte for storing data to memory.  */
+      new_addr = plus_constant (Pmode, base_addr, offset);
+      mem      = adjust_automodify_address_nv (basemem, SImode,
+					       new_addr, offset);
+      reg      = gen_rtx_REG (SImode, base_regno + par_index);
+
+      XVECEXP (result, 0, par_index + start_idx) = gen_rtx_SET (VOIDmode, mem, reg);
+    }
+
+
+  return result;
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-modes.def gcc-4.9.4/gcc/config/nds32/nds32-modes.def
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-modes.def	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-modes.def	2016-08-08 20:37:45.506270091 +0200
@@ -1,5 +1,5 @@
 /* Extra machine modes of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -18,4 +18,6 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-/* So far, there is no need to define any modes for nds32 target.  */
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*            V8QI V4HI V2SI */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-multiple.md gcc-4.9.4/gcc/config/nds32/nds32-multiple.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-multiple.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-multiple.md	2016-08-08 20:37:45.510270246 +0200
@@ -1,5 +1,5 @@
 ;; Load/Store Multiple patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.for NDS32.
 ;;
 ;; This file is part of GCC.
@@ -49,17 +49,19 @@
      otherwise we have to FAIL this rtx generation:
        1. The number of consecutive registers must be integer.
        2. Maximum 4 or 8 registers for lmw.bi instruction
-          (based on this nds32-multiple.md design).
+	  (based on this nds32-multiple.md design).
        3. Minimum 2 registers for lmw.bi instruction
-          (based on this nds32-multiple.md design).
+	  (based on this nds32-multiple.md design).
        4. operands[0] must be register for sure.
        5. operands[1] must be memory for sure.
-       6. Do not cross $r15 register because it is not allocatable.  */
+       6. operands[1] is not volatile memory access.
+       7. Do not cross $r15 register because it is not allocatable.  */
   if (GET_CODE (operands[2]) != CONST_INT
       || INTVAL (operands[2]) > maximum
       || INTVAL (operands[2]) < 2
       || GET_CODE (operands[0]) != REG
       || GET_CODE (operands[1]) != MEM
+      || MEM_VOLATILE_P (operands[1])
       || REGNO (operands[0]) + INTVAL (operands[2]) > TA_REGNUM)
     FAIL;
 
@@ -69,11 +71,294 @@
 					    INTVAL (operands[2]),
 					    force_reg (SImode,
 						       XEXP (operands[1], 0)),
-					    operands[1]);
+					    operands[1],
+					    false, NULL);
 })
 
 ;; Ordinary Load Multiple.
 
+(define_insn "*lmw_bim_si10"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
+     (set (match_operand:SI 10 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
+     (set (match_operand:SI 11 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))
+     (set (match_operand:SI 12 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 36))))])]
+  "(XVECLEN (operands[0], 0) == 11)"
+  "lmw.bim\t%3, [%1], %12, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"             "10")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si9"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
+     (set (match_operand:SI 10 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))
+     (set (match_operand:SI 11 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 32))))])]
+  "(XVECLEN (operands[0], 0) == 10)"
+  "lmw.bim\t%3, [%1], %11, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "9")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si8"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))
+     (set (match_operand:SI 10 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 28))))])]
+  "(XVECLEN (operands[0], 0) == 9)"
+  "lmw.bim\t%3, [%1], %10, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "8")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si7"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 24))))])]
+  "(XVECLEN (operands[0], 0) == 8)"
+  "lmw.bim\t%3, [%1], %9, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "7")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si6"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 20))))])]
+  "(XVECLEN (operands[0], 0) == 7)"
+  "lmw.bim\t%3, [%1], %8, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "6")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si5"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 16))))])]
+  "(XVECLEN (operands[0], 0) == 6)"
+  "lmw.bim\t%3, [%1], %7, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "5")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si4"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
+  "(XVECLEN (operands[0], 0) == 5)"
+  "lmw.bim\t%3, [%1], %6, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "4")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si3"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))])]
+  "(XVECLEN (operands[0], 0) == 4)"
+  "lmw.bim\t%3, [%1], %5, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "3")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmw_bim_si2"
+  [(match_parallel 0 "nds32_load_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8)))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (match_dup 2)))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))])]
+  "(XVECLEN (operands[0], 0) == 3)"
+  "lmw.bim\t%3, [%1], %4, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "2")
+   (set_attr "length"             "4")]
+)
+
+(define_expand "unaligned_load_update_base_w"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (plus:SI (match_operand:SI 2 "register_operand" "") (const_int 4)))
+	      (set (match_operand:SI 1 "register_operand" "")
+		   (unspec:SI [(mem:SI (match_dup 2))] UNSPEC_UALOAD_W))])]
+  ""
+{
+  /* DO NOT emit unaligned_load_w_m immediately since web pass don't
+     recognize post_inc, try it again after GCC 5.0.
+     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
+  emit_insn (gen_unaligned_load_w (operands[1], gen_rtx_MEM (SImode, operands[2])));
+  emit_insn (gen_addsi3 (operands[0], operands[2], gen_int_mode (4, Pmode)));
+  DONE;
+}
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "1")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmwsi10"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
+     (set (match_operand:SI 10 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))
+     (set (match_operand:SI 11 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 36))))])]
+  "(XVECLEN (operands[0], 0) == 10)"
+  "lmw.bi\t%2, [%1], %11, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"             "10")
+   (set_attr "length"             "4")]
+)
+
+(define_insn "*lmwsi9"
+  [(match_parallel 0 "nds32_load_multiple_operation"
+    [(set (match_operand:SI 2 "register_operand" "")
+	  (mem:SI (match_operand:SI 1 "register_operand" "r")))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+     (set (match_operand:SI 9 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))
+     (set (match_operand:SI 10 "register_operand" "")
+	  (mem:SI (plus:SI (match_dup 1) (const_int 32))))])]
+  "(XVECLEN (operands[0], 0) == 9)"
+  "lmw.bi\t%2, [%1], %10, 0x0"
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "9")
+   (set_attr "length"             "4")]
+)
+
 (define_insn "*lmwsi8"
   [(match_parallel 0 "nds32_load_multiple_operation"
     [(set (match_operand:SI 2 "register_operand" "")
@@ -94,8 +379,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
   "(XVECLEN (operands[0], 0) == 8)"
   "lmw.bi\t%2, [%1], %9, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "8")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi7"
@@ -116,8 +402,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
   "(XVECLEN (operands[0], 0) == 7)"
   "lmw.bi\t%2, [%1], %8, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "7")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi6"
@@ -136,8 +423,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
   "(XVECLEN (operands[0], 0) == 6)"
   "lmw.bi\t%2, [%1], %7, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "6")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi5"
@@ -154,8 +442,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
   "(XVECLEN (operands[0], 0) == 5)"
   "lmw.bi\t%2, [%1], %6, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "5")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi4"
@@ -170,8 +459,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
   "(XVECLEN (operands[0], 0) == 4)"
   "lmw.bi\t%2, [%1], %5, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "4")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi3"
@@ -184,8 +474,9 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
   "(XVECLEN (operands[0], 0) == 3)"
   "lmw.bi\t%2, [%1], %4, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "3")
+   (set_attr "length"             "4")]
 )
 
 (define_insn "*lmwsi2"
@@ -196,15 +487,15 @@
 	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
   "(XVECLEN (operands[0], 0) == 2)"
   "lmw.bi\t%2, [%1], %3, 0x0"
-  [(set_attr "type"   "load")
-   (set_attr "length"    "4")]
+  [(set_attr "type"   "load_multiple")
+   (set_attr "combo"              "2")
+   (set_attr "length"             "4")]
 )
 
-
 ;; Store Multiple Insns.
 ;;
 ;; operands[0] is the first memory location.
-;; opernads[1] is the first of the consecutive registers.
+;; operands[1] is the first of the consecutive registers.
 ;; operands[2] is the number of consecutive registers.
 
 (define_expand "store_multiple"
@@ -231,17 +522,19 @@
      otherwise we have to FAIL this rtx generation:
        1. The number of consecutive registers must be integer.
        2. Maximum 4 or 8 registers for smw.bi instruction
-          (based on this nds32-multiple.md design).
+	  (based on this nds32-multiple.md design).
        3. Minimum 2 registers for smw.bi instruction
-          (based on this nds32-multiple.md design).
+	  (based on this nds32-multiple.md design).
        4. operands[0] must be memory for sure.
        5. operands[1] must be register for sure.
-       6. Do not cross $r15 register because it is not allocatable.  */
+       6. operands[0] is not volatile memory access.
+       7. Do not cross $r15 register because it is not allocatable.  */
   if (GET_CODE (operands[2]) != CONST_INT
       || INTVAL (operands[2]) > maximum
       || INTVAL (operands[2]) < 2
       || GET_CODE (operands[0]) != MEM
       || GET_CODE (operands[1]) != REG
+      || MEM_VOLATILE_P (operands[0])
       || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM)
     FAIL;
 
@@ -251,11 +544,295 @@
 					     INTVAL (operands[2]),
 					     force_reg (SImode,
 							XEXP (operands[0], 0)),
-					     operands[0]);
+					     operands[0],
+					     false, NULL);
 })
 
 ;; Ordinary Store Multiple.
 
+(define_insn "*stm_bim_si10"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 40)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
+	  (match_operand:SI 9 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
+	  (match_operand:SI 10 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
+	  (match_operand:SI 11 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 36)))
+	  (match_operand:SI 12 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 11)"
+  "smw.bim\t%3, [%1], %12, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"              "10")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si9"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 36)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
+	  (match_operand:SI 9 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
+	  (match_operand:SI 10 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 32)))
+	  (match_operand:SI 11 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 10)"
+  "smw.bim\t%3, [%1], %11, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "9")
+   (set_attr "length"              "4")]
+)
+
+
+(define_insn "*stm_bim_si8"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 32)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
+	  (match_operand:SI 9 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 28)))
+	  (match_operand:SI 10 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 9)"
+  "smw.bim\t%3, [%1], %10, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "8")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si7"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 28)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 24)))
+	  (match_operand:SI 9 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 8)"
+  "smw.bim\t%3, [%1], %9, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "7")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si6"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 24)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 20)))
+	  (match_operand:SI 8 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 7)"
+  "smw.bim\t%3, [%1], %8, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "6")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si5"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 20)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 16)))
+	  (match_operand:SI 7 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 6)"
+  "smw.bim\t%3, [%1], %7, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "5")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si4"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 16)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
+	  (match_operand:SI 6 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 5)"
+  "smw.bim\t%3, [%1], %6, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "4")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si3"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 12)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	  (match_operand:SI 5 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 4)"
+  "smw.bim\t%3, [%1], %5, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "3")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stm_bim_si2"
+  [(match_parallel 0 "nds32_store_multiple_and_update_address_operation"
+    [(set (match_operand:SI 1 "register_operand" "=r")
+	  (plus:SI (match_operand:SI 2 "register_operand" "1") (const_int 8)))
+     (set (mem:SI (match_dup 2))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	  (match_operand:SI 4 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 3)"
+  "smw.bim\t%3, [%1], %4, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "2")
+   (set_attr "length"              "4")]
+)
+
+(define_expand "unaligned_store_update_base_w"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
+	      (set (mem:SI (match_dup 1))
+		   (unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_UASTORE_W))])]
+  ""
+{
+  /* DO NOT emit unaligned_store_w_m immediately since web pass don't
+     recognize post_inc, try it again after GCC 5.0.
+     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
+  emit_insn (gen_unaligned_store_w (gen_rtx_MEM (SImode, operands[1]), operands[2]));
+  emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (4, Pmode)));
+  DONE;
+}
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "1")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stmsi10"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 7 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 8 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 9 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
+	  (match_operand:SI 10 "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 36)))
+	  (match_operand:SI 11 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 10)"
+  "smw.bi\t%2, [%1], %11, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"              "10")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "*stmsi9"
+  [(match_parallel 0 "nds32_store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 3  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 4  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 5  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 6  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 7  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 8  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 9  "register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 32)))
+	  (match_operand:SI 10 "register_operand" ""))])]
+  "(XVECLEN (operands[0], 0) == 9)"
+  "smw.bi\t%2, [%1], %10, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "9")
+   (set_attr "length"              "4")]
+)
+
 (define_insn "*stmsi8"
   [(match_parallel 0 "nds32_store_multiple_operation"
     [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
@@ -276,8 +853,9 @@
 	  (match_operand:SI 9 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 8)"
   "smw.bi\t%2, [%1], %9, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "8")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi7"
@@ -298,8 +876,9 @@
 	  (match_operand:SI 8 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 7)"
   "smw.bi\t%2, [%1], %8, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "7")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi6"
@@ -318,8 +897,9 @@
 	  (match_operand:SI 7 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 6)"
   "smw.bi\t%2, [%1], %7, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "6")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi5"
@@ -336,8 +916,9 @@
 	  (match_operand:SI 6 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 5)"
   "smw.bi\t%2, [%1], %6, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "5")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi4"
@@ -352,8 +933,9 @@
 	  (match_operand:SI 5 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 4)"
   "smw.bi\t%2, [%1], %5, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "4")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi3"
@@ -366,8 +948,9 @@
 	  (match_operand:SI 4 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 3)"
   "smw.bi\t%2, [%1], %4, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "3")
+   (set_attr "length"              "4")]
 )
 
 (define_insn "*stmsi2"
@@ -378,8 +961,9 @@
 	  (match_operand:SI 3 "register_operand" ""))])]
   "(XVECLEN (operands[0], 0) == 2)"
   "smw.bi\t%2, [%1], %3, 0x0"
-  [(set_attr "type"   "store")
-   (set_attr "length"     "4")]
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "2")
+   (set_attr "length"              "4")]
 )
 
 ;; Move a block of memory if it is word aligned and MORE than 2 words long.
@@ -391,14 +975,14 @@
 ;; operands[2] is the number of bytes to move.
 ;; operands[3] is the known shared alignment.
 
-(define_expand "movmemqi"
+(define_expand "movmemsi"
   [(match_operand:BLK 0 "general_operand" "")
    (match_operand:BLK 1 "general_operand" "")
-   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 2 "nds32_reg_constant_operand" "")
    (match_operand:SI 3 "const_int_operand" "")]
   ""
 {
-  if (nds32_expand_movmemqi (operands[0],
+  if (nds32_expand_movmemsi (operands[0],
 			     operands[1],
 			     operands[2],
 			     operands[3]))
@@ -408,3 +992,75 @@
 })
 
 ;; ------------------------------------------------------------------------
+
+(define_insn "lmwzb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
+   (set (match_operand:SI 2 "register_operand" "=r")
+	(unspec:SI [(mem:SI (match_dup 1))] UNSPEC_LMWZB))]
+  ""
+  "lmwzb.bm\t%2, [%1], %2, 0x0"
+  [(set_attr "type"    "load_multiple")
+   (set_attr "combo"               "1")
+   (set_attr "length"              "4")]
+)
+
+(define_insn "smwzb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 4)))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI [(match_operand:SI 2 "register_operand" "r")] UNSPEC_SMWZB))]
+  ""
+  "smwzb.bm\t%2, [%1], %2, 0x0"
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "1")
+   (set_attr "length"              "4")]
+)
+
+(define_expand "movstr"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:BLK 1 "memory_operand" "")
+   (match_operand:BLK 2 "memory_operand" "")]
+  "TARGET_EXT_STRING && TARGET_INLINE_STRCPY"
+{
+  if (nds32_expand_movstr (operands[0],
+			   operands[1],
+			   operands[2]))
+    DONE;
+
+  FAIL;
+})
+
+(define_expand "strlensi"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:BLK 1 "memory_operand")
+   (match_operand:QI  2 "nds32_reg_constant_operand")
+   (match_operand     3 "const_int_operand")]
+  "TARGET_EXT_STRING"
+{
+  if (nds32_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+
+  FAIL;
+})
+
+(define_expand "setmemsi"
+   [(use (match_operand:BLK 0 "memory_operand"))
+    (use (match_operand:SI 1 "nds32_reg_constant_operand"))
+    (use (match_operand:QI 2 "nonmemory_operand"))
+    (use (match_operand 3 "const_int_operand"))
+    (use (match_operand:SI 4 "const_int_operand"))
+    (use (match_operand:SI 5 "const_int_operand"))]
+  ""
+{
+ if (nds32_expand_setmem (operands[0], operands[1],
+			  operands[2], operands[3],
+			  operands[4], operands[5]))
+   DONE;
+
+ FAIL;
+})
+
+
+
+;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-n13.md gcc-4.9.4/gcc/config/nds32/nds32-n13.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-n13.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-n13.md	2016-08-08 20:37:45.510270246 +0200
@@ -0,0 +1,306 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N13 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n13_machine")
+
+(define_cpu_unit "n13_i1" "nds32_n13_machine")
+(define_cpu_unit "n13_i2" "nds32_n13_machine")
+(define_cpu_unit "n13_e1" "nds32_n13_machine")
+(define_cpu_unit "n13_e2" "nds32_n13_machine")
+(define_cpu_unit "n13_e3" "nds32_n13_machine")
+(define_cpu_unit "n13_e4" "nds32_n13_machine")
+
+(define_insn_reservation "nds_n13_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mmu" 1
+  (and (eq_attr "type" "mmu")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu_shift" 1
+  (and (eq_attr "type" "alu_shift")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsad" 1
+  (and (eq_attr "type" "pbsad")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsada" 1
+  (and (eq_attr "type" "pbsada")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load" 1
+  (and (match_test "nds32_load_single_p (insn)")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store" 1
+  (and (match_test "nds32_store_single_p (insn)")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_load_double_p (insn)"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_store_double_p (insn)"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_mul" 1
+  (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mac" 1
+  (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+;; The cycles consumed in E2 stage is 32 - CLZ(abs(Ra)) + 2,
+;; so the worst case is 34.
+(define_insn_reservation "nds_n13_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+  "nds_n13_load"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_load_to_e1_p"
+)
+
+;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n13_load"
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_load_to_e2_p"
+)
+
+;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_load_to_e1_p")
+
+;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_load_to_e2_p"
+)
+
+;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_two_load_to_e1_p")
+
+;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT
+;;   ->  ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_e2_to_e1_p")
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-n7.md gcc-4.9.4/gcc/config/nds32/nds32-n7.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-n7.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-n7.md	2016-08-08 20:37:45.510270246 +0200
@@ -0,0 +1,237 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N8 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n7_machine")
+
+(define_cpu_unit "n7_ii" "nds32_n7_machine")
+
+(define_insn_reservation "nds_n7_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_load" 1
+  (and (match_test "nds32_load_single_p (insn)")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_store" 1
+  (and (match_test "nds32_store_single_p (insn)")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_load_double_p (insn)"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*2")
+
+(define_insn_reservation "nds_n7_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*3")
+
+(define_insn_reservation "nds_n7_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*4")
+
+(define_insn_reservation "nds_n7_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*5")
+
+(define_insn_reservation "nds_n7_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*6")
+
+(define_insn_reservation "nds_n7_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*7")
+
+(define_insn_reservation "nds_n7_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*8")
+
+(define_insn_reservation "nds_n7_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*12")
+
+(define_insn_reservation "nds_n7_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_store_double_p (insn)"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*2")
+
+(define_insn_reservation "nds_n7_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*3")
+
+(define_insn_reservation "nds_n7_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*4")
+
+(define_insn_reservation "nds_n7_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*5")
+
+(define_insn_reservation "nds_n7_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*6")
+
+(define_insn_reservation "nds_n7_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*7")
+
+(define_insn_reservation "nds_n7_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*8")
+
+(define_insn_reservation "nds_n7_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*12")
+
+(define_insn_reservation "nds_n7_mul_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n7")))
+  "n7_ii")
+
+(define_insn_reservation "nds_n7_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n7")))
+  "n7_ii*17")
+
+(define_insn_reservation "nds_n7_mac_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n7")))
+  "n7_ii*2")
+
+(define_insn_reservation "nds_n7_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n7")))
+  "n7_ii*18")
+
+(define_insn_reservation "nds_n7_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii*37")
+
+(define_insn_reservation "nds_n7_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "n7"))
+  "n7_ii")
+
+;; LD_!bi
+;;   -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, ADDR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1)
+(define_bypass 2
+  "nds_n7_load"
+  "nds_n7_alu,\
+   nds_n7_mul_fast, nds_n7_mul_slow,\
+   nds_n7_mac_fast, nds_n7_mac_slow,\
+   nds_n7_div,\
+   nds_n7_branch,\
+   nds_n7_load, nds_n7_store,\
+   nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
+   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
+   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\
+   nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\
+   nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\
+   nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12"
+  "nds32_n7_load_to_ii_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, MOVD44_E, MUL, MAC_RaRb, DIV, BR, AADR_IN_MOP(1), ST_bi, ST_!bi_RI, SMW(N, 1)
+(define_bypass 2
+  "nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
+   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
+   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12"
+  "nds_n7_alu,\
+   nds_n7_mul_fast, nds_n7_mul_slow,\
+   nds_n7_mac_fast, nds_n7_mac_slow,\
+   nds_n7_div,\
+   nds_n7_branch,\
+   nds_n7_load, nds_n7_store,\
+   nds_n7_load_multiple_1,nds_n7_load_multiple_2, nds_n7_load_multiple_3,\
+   nds_n7_load_multiple_4,nds_n7_load_multiple_5, nds_n7_load_multiple_6,\
+   nds_n7_load_multiple_7,nds_n7_load_multiple_8, nds_n7_load_multiple_12,\
+   nds_n7_store_multiple_1,nds_n7_store_multiple_2, nds_n7_store_multiple_3,\
+   nds_n7_store_multiple_4,nds_n7_store_multiple_5, nds_n7_store_multiple_6,\
+   nds_n7_store_multiple_7,nds_n7_store_multiple_8, nds_n7_store_multiple_12"
+  "nds32_n7_last_load_to_ii_p"
+)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-n8.md gcc-4.9.4/gcc/config/nds32/nds32-n8.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-n8.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-n8.md	2016-08-08 20:37:45.510270246 +0200
@@ -0,0 +1,314 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N8 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n8_machine")
+
+(define_cpu_unit "n8_ii" "nds32_n8_machine")
+(define_cpu_unit "n8_ex" "nds32_n8_machine")
+
+(define_insn_reservation "nds_n8_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_load" 1
+  (and (match_test "nds32_load_single_p (insn)")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_store" 1
+  (and (match_test "nds32_store_single_p (insn)")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_load_double_p (insn)"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ii+n8_ex, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*2, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*3, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*4, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*5, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*6, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*7, n8_ex")
+
+(define_insn_reservation "nds_n8_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*11, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32_store_double_p (insn)"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ii+n8_ex, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*2, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*3, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*4, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*5, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*6, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*7, n8_ex")
+
+(define_insn_reservation "nds_n8_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*11, n8_ex")
+
+(define_insn_reservation "nds_n8_mul_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n8")))
+  "n8_ii, n8_ex")
+
+(define_insn_reservation "nds_n8_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n8")))
+  "n8_ii, n8_ex*16")
+
+(define_insn_reservation "nds_n8_mac_fast" 1
+  (and (match_test "nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n8")))
+  "n8_ii, n8_ii+n8_ex, n8_ex")
+
+(define_insn_reservation "nds_n8_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n8")))
+  "n8_ii, (n8_ii+n8_ex)*16, n8_ex")
+
+(define_insn_reservation "nds_n8_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, (n8_ii+n8_ex)*36, n8_ex")
+
+(define_insn_reservation "nds_n8_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "n8"))
+  "n8_ii, n8_ex")
+
+;; LD_!bi -> ADDR_IN_MOP(1)
+(define_bypass 3
+  "nds_n8_load"
+  "nds_n8_branch,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_load_to_ii_p"
+)
+
+;; LMW(N, N) -> ADDR_IN_MOP(1)
+(define_bypass 3
+  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
+  "nds_n8_branch,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_last_load_to_ii_p"
+)
+
+;; LMW(N, N - 1) -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
+  "nds_n8_branch,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_last_load_two_to_ii_p"
+)
+
+;; LD_bi -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_n8_load"
+  "nds_n8_branch,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_load_bi_to_ii_p"
+)
+
+;; LD_!bi -> ALU, MOVD44_E, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n8_load"
+  "nds_n8_alu,
+   nds_n8_mul_fast, nds_n8_mul_slow,\
+   nds_n8_mac_fast, nds_n8_mac_slow,\
+   nds_n8_div,\
+   nds_n8_branch,\
+   nds_n8_store,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_load_to_ex_p"
+)
+
+;; ALU, MOVD44_O, MUL, MAC, DIV_Rs, LD_bi, ADDR_OUT -> ADDR_IN_MOP(1)
+(define_bypass 2
+  "nds_n8_alu,
+   nds_n8_mul_fast, nds_n8_mul_slow,\
+   nds_n8_mac_fast, nds_n8_mac_slow,\
+   nds_n8_div,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds_n8_branch,\
+   nds_n8_load, nds_n8_store,\
+   nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_ex_to_ii_p"
+)
+
+;; LMW(N, N) -> ALU, MOVD44_E, MUL, MAC, DIV, BR_COND, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n8_load_multiple_1,nds_n8_load_multiple_2, nds_n8_load_multiple_3,\
+   nds_n8_load_multiple_4,nds_n8_load_multiple_5, nds_n8_load_multiple_6,\
+   nds_n8_load_multiple_7,nds_n8_load_multiple_8, nds_n8_load_multiple_12"
+  "nds_n8_alu,
+   nds_n8_mul_fast, nds_n8_mul_slow,\
+   nds_n8_mac_fast, nds_n8_mac_slow,\
+   nds_n8_div,\
+   nds_n8_branch,\
+   nds_n8_store,\
+   nds_n8_store_multiple_1,nds_n8_store_multiple_2, nds_n8_store_multiple_3,\
+   nds_n8_store_multiple_4,nds_n8_store_multiple_5, nds_n8_store_multiple_6,\
+   nds_n8_store_multiple_7,nds_n8_store_multiple_8, nds_n8_store_multiple_12"
+  "nds32_n8_last_load_to_ex_p"
+)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-n9-2r1w.md gcc-4.9.4/gcc/config/nds32/nds32-n9-2r1w.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-n9-2r1w.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-n9-2r1w.md	2016-08-08 20:37:45.510270246 +0200
@@ -0,0 +1,295 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N9 2R1W pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n9_2r1w_machine")
+
+(define_cpu_unit "n9_2r1w_ii" "nds32_n9_2r1w_machine")
+(define_cpu_unit "n9_2r1w_ex" "nds32_n9_2r1w_machine")
+(define_cpu_unit "n9_2r1w_mm" "nds32_n9_2r1w_machine")
+(define_cpu_unit "n9_2r1w_wb" "nds32_n9_2r1w_machine")
+
+(define_insn_reservation "nds_n9_2r1w_unknown" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "unknown")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_misc" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "misc")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_mmu" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "mmu")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_alu" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "alu")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_alu_shift" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "alu_shift")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_pbsad" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "pbsad")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_pbsada" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "pbsada")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex*3, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (match_test "nds32_load_single_p (insn)")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (match_test "nds32_store_single_p (insn)")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "1"))))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (ior (and (eq_attr "type" "load_multiple")
+		      (eq_attr "combo" "2"))
+		 (match_test "nds32_load_double_p (insn)"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_3" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "3"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_4" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "4"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_5" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "5"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_6" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "6"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_7" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "7"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_8" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "8"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_load_multiple_12" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "12"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "1"))))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (ior (and (eq_attr "type" "store_multiple")
+		      (eq_attr "combo" "2"))
+		 (match_test "nds32_store_double_p (insn)"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_3" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "3"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_4" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "4"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_5" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "5"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*2, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_6" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "6"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*3, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_7" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "7"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*4, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_8" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "8"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*5, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_store_multiple_12" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "12"))))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm, (n9_2r1w_ii+n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb)*9, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_mul_fast" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_mul_slow" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex*17, n9_2r1w_mm, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_mac_fast" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config != MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ii+n9_2r1w_ex, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_mac_slow" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W && nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*17, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_ex+n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_div" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "div")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, (n9_2r1w_ii+n9_2r1w_ex)*34, n9_2r1w_ex+n9_2r1w_mm, n9_2r1w_mm+n9_2r1w_wb, n9_2r1w_wb")
+
+(define_insn_reservation "nds_n9_2r1w_branch" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_2R1W")
+       (and (eq_attr "type" "branch")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_2r1w_ii, n9_2r1w_ex, n9_2r1w_mm, n9_2r1w_wb")
+
+;; LD_!bi, MUL, MAC
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44_E, MUL, MAC_RaRb, M2R, DIV, ADDR_IN_!bi, ADDR_IN_bi_Ra, ST_bi, ST_!bi_RI, BR, MMU
+(define_bypass 2
+  "nds_n9_2r1w_load,\
+   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
+   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow"
+  "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\
+   nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\
+   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
+   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\
+   nds_n9_2r1w_branch,\
+   nds_n9_2r1w_div,\
+   nds_n9_2r1w_load,nds_n9_2r1w_store,\
+   nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
+   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
+   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\
+   nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\
+   nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\
+   nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\
+   nds_n9_2r1w_mmu"
+  "nds32_n9_2r1w_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+(define_bypass 2
+  "nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
+   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
+   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12"
+  "nds_n9_2r1w_alu, nds_n9_2r1w_alu_shift,\
+   nds_n9_2r1w_pbsad, nds_n9_2r1w_pbsada,\
+   nds_n9_2r1w_mul_fast, nds_n9_2r1w_mul_slow,\
+   nds_n9_2r1w_mac_fast, nds_n9_2r1w_mac_slow,\
+   nds_n9_2r1w_branch,\
+   nds_n9_2r1w_div,\
+   nds_n9_2r1w_load,nds_n9_2r1w_store,\
+   nds_n9_2r1w_load_multiple_1,nds_n9_2r1w_load_multiple_2, nds_n9_2r1w_load_multiple_3,\
+   nds_n9_2r1w_load_multiple_4,nds_n9_2r1w_load_multiple_5, nds_n9_2r1w_load_multiple_6,\
+   nds_n9_2r1w_load_multiple_7,nds_n9_2r1w_load_multiple_8, nds_n9_2r1w_load_multiple_12,\
+   nds_n9_2r1w_store_multiple_1,nds_n9_2r1w_store_multiple_2, nds_n9_2r1w_store_multiple_3,\
+   nds_n9_2r1w_store_multiple_4,nds_n9_2r1w_store_multiple_5, nds_n9_2r1w_store_multiple_6,\
+   nds_n9_2r1w_store_multiple_7,nds_n9_2r1w_store_multiple_8, nds_n9_2r1w_store_multiple_12,\
+   nds_n9_2r1w_mmu"
+  "nds32_n9_last_load_to_ex_p"
+)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-n9-3r2w.md gcc-4.9.4/gcc/config/nds32/nds32-n9-3r2w.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-n9-3r2w.md	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-n9-3r2w.md	2016-08-08 20:37:45.510270246 +0200
@@ -0,0 +1,308 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N9 3R2W pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n9_3r2w_machine")
+
+(define_cpu_unit "n9_3r2w_ii" "nds32_n9_3r2w_machine")
+(define_cpu_unit "n9_3r2w_ex" "nds32_n9_3r2w_machine")
+(define_cpu_unit "n9_3r2w_mm" "nds32_n9_3r2w_machine")
+(define_cpu_unit "n9_3r2w_wb" "nds32_n9_3r2w_machine")
+
+(define_insn_reservation "nds_n9_3r2w_unknown" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "unknown")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_misc" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "misc")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mmu" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "mmu")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_alu" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "alu")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_alu_shift" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "alu_shift")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_pbsad" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "pbsad")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_pbsada" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "pbsada")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*3, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (match_test "nds32_load_single_p (insn)")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (match_test "nds32_store_single_p (insn)")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "1"))))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (ior (and (eq_attr "type" "load_multiple")
+		      (eq_attr "combo" "2"))
+		 (match_test "nds32_load_double_p (insn)"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_3" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "3"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_4" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "4"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_5" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "5"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_6" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "6"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_7" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "7"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_8" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "8"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_load_multiple_12" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "12"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "1"))))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (ior (and (eq_attr "type" "store_multiple")
+		      (eq_attr "combo" "2"))
+		 (match_test "nds32_store_double_p (insn)"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_3" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "3"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_4" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "4"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_5" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "5"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*2, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_6" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "6"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*3, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_7" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "7"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*4, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_8" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "8"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*5, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_store_multiple_12" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "pipeline_model" "n9")
+	    (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "12"))))
+  "n9_3r2w_ii, n9_3r2w_ii+n9_3r2w_ex, n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm, (n9_3r2w_ii+n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb)*9, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_mm+n9_3r2w_wb, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mul_fast1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mul_fast2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mul_slow" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mac_fast1" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mac_fast2" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*2, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_mac_slow" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W && nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*17, n9_3r2w_ex+n9_3r2w_mm, n9_3r2w_ex+n9_3r2w_mm+n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_div" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "div")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex*34, n9_3r2w_mm, n9_3r2w_wb")
+
+(define_insn_reservation "nds_n9_3r2w_branch" 1
+  (and (match_test "nds32_register_ports_config == REG_PORT_3R2W")
+       (and (eq_attr "type" "branch")
+	    (eq_attr "pipeline_model" "n9")))
+  "n9_3r2w_ii, n9_3r2w_ex, n9_3r2w_mm, n9_3r2w_wb")
+
+;; LD, MUL, MAC, DIV
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+(define_bypass 2
+  "nds_n9_3r2w_load,\
+   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
+   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
+   nds_n9_3r2w_div"
+  "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\
+   nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\
+   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
+   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
+   nds_n9_3r2w_branch,\
+   nds_n9_3r2w_div,\
+   nds_n9_3r2w_load,nds_n9_3r2w_store,\
+   nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
+   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
+   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\
+   nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\
+   nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\
+   nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\
+   nds_n9_3r2w_mmu"
+  "nds32_n9_3r2w_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+(define_bypass 2
+  "nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
+   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
+   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12"
+  "nds_n9_3r2w_alu, nds_n9_3r2w_alu_shift,\
+   nds_n9_3r2w_pbsad, nds_n9_3r2w_pbsada,\
+   nds_n9_3r2w_mul_fast1, nds_n9_3r2w_mul_fast2, nds_n9_3r2w_mul_slow,\
+   nds_n9_3r2w_mac_fast1, nds_n9_3r2w_mac_fast2, nds_n9_3r2w_mac_slow,\
+   nds_n9_3r2w_branch,\
+   nds_n9_3r2w_div,\
+   nds_n9_3r2w_load,nds_n9_3r2w_store,\
+   nds_n9_3r2w_load_multiple_1,nds_n9_3r2w_load_multiple_2, nds_n9_3r2w_load_multiple_3,\
+   nds_n9_3r2w_load_multiple_4,nds_n9_3r2w_load_multiple_5, nds_n9_3r2w_load_multiple_6,\
+   nds_n9_3r2w_load_multiple_7,nds_n9_3r2w_load_multiple_8, nds_n9_3r2w_load_multiple_12,\
+   nds_n9_3r2w_store_multiple_1,nds_n9_3r2w_store_multiple_2, nds_n9_3r2w_store_multiple_3,\
+   nds_n9_3r2w_store_multiple_4,nds_n9_3r2w_store_multiple_5, nds_n9_3r2w_store_multiple_6,\
+   nds_n9_3r2w_store_multiple_7,nds_n9_3r2w_store_multiple_8, nds_n9_3r2w_store_multiple_12,\
+   nds_n9_3r2w_mmu"
+  "nds32_n9_last_load_to_ex_p"
+)
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32.opt gcc-4.9.4/gcc/config/nds32/nds32.opt
--- gcc-4.9.4.orig/gcc/config/nds32/nds32.opt	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32.opt	2016-08-08 20:37:45.590273343 +0200
@@ -1,5 +1,5 @@
 ; Options of Andes NDS32 cpu for GNU compiler
-; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ; Contributed by Andes Technology Corporation.
 ;
 ; This file is part of GCC.
@@ -21,14 +21,31 @@
 HeaderInclude
 config/nds32/nds32-opts.h
 
-mbig-endian
-Target Report RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN)
+; ---------------------------------------------------------------
+; The following options are designed for aliasing and compatibility options.
+
+EB
+Target RejectNegative Alias(mbig-endian)
 Generate code in big-endian mode.
 
-mlittle-endian
-Target Report RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
+EL
+Target RejectNegative Alias(mlittle-endian)
 Generate code in little-endian mode.
 
+mfp-as-gp
+Target RejectNegative Alias(mforce-fp-as-gp)
+Force performing fp-as-gp optimization.
+
+mno-fp-as-gp
+Target RejectNegative Alias(mforbid-fp-as-gp)
+Forbid performing fp-as-gp optimization.
+
+m16bit
+Target Undocumented Alias(m16-bit)
+Generate 16-bit instructions.
+
+; ---------------------------------------------------------------
+
 mreduced-regs
 Target Report RejectNegative Negative(mfull-regs) Mask(REDUCED_REGS)
 Use reduced-set registers for register allocation.
@@ -37,14 +54,78 @@
 Target Report RejectNegative Negative(mreduced-regs) InverseMask(REDUCED_REGS)
 Use full-set registers for register allocation.
 
+; ---------------------------------------------------------------
+
+malways-align
+Target Mask(ALWAYS_ALIGN)
+Always align function entry, jump target and return address.
+
+malign-functions
+Target Mask(ALIGN_FUNCTION)
+Align function entry to 4 byte.
+
+mbig-endian
+Target Undocumented RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN)
+Generate code in big-endian mode.
+
+mlittle-endian
+Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
+Generate code in little-endian mode.
+
+mforce-fp-as-gp
+Target Undocumented Mask(FORCE_FP_AS_GP)
+Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
+
+mforbid-fp-as-gp
+Target Undocumented Mask(FORBID_FP_AS_GP)
+Forbid using $fp to access static and global variables.  This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
+
+minline-strcpy
+Target Undocumented Mask(INLINE_STRCPY)
+Inlining strcpy function.
+
+mload-store-opt
+Target Mask(LOAD_STORE_OPT)
+Enable load store optimization.
+
+mregrename
+Target Mask(REGRENAME_OPT)
+Enable target dependent register rename optimization.
+
+mgcse
+Target Mask(GCSE_OPT)
+Enable target dependent global CSE optimization.
+
+msoft-fp-arith-comm
+Target Mask(SOFT_FP_ARITH_COMM)
+Enable operand commutative for soft floating point arithmetic optimization.
+
+; ---------------------------------------------------------------
+
 mcmov
 Target Report Mask(CMOV)
 Generate conditional move instructions.
 
-mperf-ext
-Target Report Mask(PERF_EXT)
+mhw-abs
+Target Report Mask(HW_ABS)
+Generate hardware abs instructions.
+
+mext-perf
+Target Report Mask(EXT_PERF)
 Generate performance extension instructions.
 
+mext-perf2
+Target Report Mask(EXT_PERF2)
+Generate performance extension version 2 instructions.
+
+mext-string
+Target Report Mask(EXT_STRING)
+Generate string extension instructions.
+
+mext-dsp
+Target Report Mask(EXT_DSP)
+Generate DSP extension instructions.
+
 mv3push
 Target Report Mask(V3PUSH)
 Generate v3 push25/pop25 instructions.
@@ -53,12 +134,16 @@
 Target Report Mask(16_BIT)
 Generate 16-bit instructions.
 
-mgp-direct
-Target Report Mask(GP_DIRECT)
-Generate GP base instructions directly.
+mrelax-hint
+Target Report Mask(RELAX_HINT)
+Insert relax hint for linker to do relaxation.
+
+mvh
+Target Report Mask(VH) Condition(!TARGET_LINUX_ABI)
+Enable Virtual Hosting support.
 
 misr-vector-size=
-Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE)
+Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) Condition(!TARGET_LINUX_ABI)
 Specify the size of each interrupt vector, which must be 4 or 16.
 
 mcache-block-size=
@@ -71,32 +156,348 @@
 
 Enum
 Name(nds32_arch_type) Type(enum nds32_arch_type)
+Known arch types (for use with the -march= option):
 
 EnumValue
 Enum(nds32_arch_type) String(v2) Value(ARCH_V2)
 
 EnumValue
+Enum(nds32_arch_type) String(v2j) Value(ARCH_V2J)
+
+EnumValue
 Enum(nds32_arch_type) String(v3) Value(ARCH_V3)
 
 EnumValue
+Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J)
+
+EnumValue
 Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M)
 
-mforce-fp-as-gp
-Target Report Mask(FORCE_FP_AS_GP)
-Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
+EnumValue
+Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F)
 
-mforbid-fp-as-gp
-Target Report Mask(FORBID_FP_AS_GP)
-Forbid using $fp to access static and global variables.  This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
+EnumValue
+Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S)
+
+mcpu=
+Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9)
+Specify the cpu for pipeline model.
+
+Enum
+Name(nds32_cpu_type) Type(enum nds32_cpu_type)
+Known cpu types (for use with the -mcpu= option):
+
+EnumValue
+Enum(nds32_cpu_type) String(n7) Value(CPU_N7)
+
+EnumValue
+Enum(nds32_cpu_type) String(n705) Value(CPU_N7)
+
+EnumValue
+Enum(nds32_cpu_type) String(n8) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(n801) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(sn8) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(sn801) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(s8) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(s801) Value(CPU_N8)
+
+EnumValue
+Enum(nds32_cpu_type) String(e8) Value(CPU_E8)
+
+EnumValue
+Enum(nds32_cpu_type) String(e801) Value(CPU_E8)
+
+EnumValue
+Enum(nds32_cpu_type) String(n9) Value(CPU_N9)
+
+EnumValue
+Enum(nds32_cpu_type) String(n903) Value(CPU_N9)
+
+EnumValue
+Enum(nds32_cpu_type) String(n903a) Value(CPU_N9)
+
+EnumValue
+Enum(nds32_cpu_type) String(n968) Value(CPU_N9)
+
+EnumValue
+Enum(nds32_cpu_type) String(n968a) Value(CPU_N9)
+
+EnumValue
+Enum(nds32_cpu_type) String(n10) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d10) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n12) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1213) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n13) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE)
+
+mmemory-model=
+Target RejectNegative Joined Enum(nds32_memory_model_type) Var(nds32_memory_model_option) Init(MEMORY_MODEL_FAST)
+Specify the memory model, fast or slow memory.
+
+Enum
+Name(nds32_memory_model_type) Type(enum nds32_memory_model_type)
+
+EnumValue
+Enum(nds32_memory_model_type) String(slow) Value(MEMORY_MODEL_SLOW)
+
+EnumValue
+Enum(nds32_memory_model_type) String(fast) Value(MEMORY_MODEL_FAST)
+
+mfloat-abi=
+Target RejectNegative Joined Enum(float_abi_type) Var(nds32_float_abi) Init(TARGET_DEFAULT_FLOAT_ABI)
+Specify if floating point hardware should be used. The valid value is : soft, hard.
+
+Enum
+Name(float_abi_type) Type(enum float_abi_type)
+Known floating-point ABIs (for use with the -mfloat-abi= option):
+
+EnumValue
+Enum(float_abi_type) String(soft) Value(NDS32_FLOAT_ABI_SOFT)
+
+EnumValue
+Enum(float_abi_type) String(hard) Value(NDS32_FLOAT_ABI_HARD)
+
+mconfig-fpu=
+Target RejectNegative Joined Enum(float_reg_number) Var(nds32_fp_regnum) Init(TARGET_CONFIG_FPU_DEFAULT)
+Specify a fpu configuration value from 0 to 7; 0-3 is as FPU spec says, and 4-7 is corresponding to 0-3.
+
+Enum
+Name(float_reg_number) Type(enum float_reg_number)
+Known floating-point number of registers (for use with the -mconfig-fpu= option):
+
+EnumValue
+Enum(float_reg_number) String(0) Value(NDS32_CONFIG_FPU_0)
+
+EnumValue
+Enum(float_reg_number) String(1) Value(NDS32_CONFIG_FPU_1)
+
+EnumValue
+Enum(float_reg_number) String(2) Value(NDS32_CONFIG_FPU_2)
+
+EnumValue
+Enum(float_reg_number) String(3) Value(NDS32_CONFIG_FPU_3)
+
+EnumValue
+Enum(float_reg_number) String(4) Value(NDS32_CONFIG_FPU_4)
+
+EnumValue
+Enum(float_reg_number) String(5) Value(NDS32_CONFIG_FPU_5)
+
+EnumValue
+Enum(float_reg_number) String(6) Value(NDS32_CONFIG_FPU_6)
+
+EnumValue
+Enum(float_reg_number) String(7) Value(NDS32_CONFIG_FPU_7)
+
+mconfig-mul=
+Target RejectNegative Joined Enum(nds32_mul_type) Var(nds32_mul_config) Init(MUL_TYPE_FAST_1)
+Specify configuration of instruction mul: fast1, fast2 or slow. The default is fast1.
+
+Enum
+Name(nds32_mul_type) Type(enum nds32_mul_type)
+
+EnumValue
+Enum(nds32_mul_type) String(fast1) Value(MUL_TYPE_FAST_1)
+
+EnumValue
+Enum(nds32_mul_type) String(fast2) Value(MUL_TYPE_FAST_2)
+
+EnumValue
+Enum(nds32_mul_type) String(slow) Value(MUL_TYPE_SLOW)
+
+mconfig-register-ports=
+Target RejectNegative Joined Enum(nds32_register_ports) Var(nds32_register_ports_config) Init(REG_PORT_3R2W)
+Specify how many read/write ports for n9/n10 cores.  The value should be 3r2w or 2r1w.
+
+Enum
+Name(nds32_register_ports) Type(enum nds32_register_ports)
+
+EnumValue
+Enum(nds32_register_ports) String(3r2w) Value(REG_PORT_3R2W)
+
+EnumValue
+Enum(nds32_register_ports) String(2r1w) Value(REG_PORT_2R1W)
+
+mifc
+Target Report Mask(IFC)
+Use special directives to guide linker doing ifc optimization.
 
 mex9
 Target Report Mask(EX9)
 Use special directives to guide linker doing ex9 optimization.
 
+mprint-stall-cycles
+Target Report Mask(PRINT_STALLS)
+Print stall cycles due to structural or data dependencies. It should be used with the option '-S'.
+Note that stall cycles are determined by the compiler's pipeline model and it may not be precise.
+
 mctor-dtor
 Target Report
 Enable constructor/destructor feature.
 
+mcrt-arg
+Target Report
+Enable argc/argv passed by simulator.
+
 mrelax
 Target Report
 Guide linker to relax instructions.
+
+minnermost-loop
+Target Report Mask(INNERMOST_LOOP)
+Insert the innermost loop directive.
+
+mext-fpu-fma
+Target Report Mask(EXT_FPU_FMA)
+Generate floating-point multiply-accumulation instructions.
+
+mext-fpu-sp
+Target Report Mask(FPU_SINGLE)
+Generate single-precision floating-point instructions.
+
+mext-fpu-dp
+Target Report Mask(FPU_DOUBLE)
+Generate double-precision floating-point instructions.
+
+mext-zol
+Target Report Mask(HWLOOP)
+Insert the hardware loop directive.
+
+mforce-no-ext-zol
+Target Undocumented Report Mask(FORCE_NO_HWLOOP)
+Force disable hardware loop, even use -mext-zol.
+
+mforce-no-ext-dsp
+Target Undocumented Report Mask(FORCE_NO_EXT_DSP)
+Force disable hardware loop, even use -mext-dsp.
+
+msched-prolog-epilog
+Target Var(flag_sched_prolog_epilog) Init(1)
+Permit scheduling of a function's prologue and epilogue sequence.
+
+mret-in-naked-func
+Target Var(flag_ret_in_naked_func) Init(1)
+Generate return instruction in naked function.
+
+malways-save-lp
+Target Var(flag_always_save_lp) Init(0)
+Always save $lp in the stack.
+
+; ---------------------------------------------------------------
+; The following options are designed for compatibility issue.
+; Hopefully these obsolete options will be removed one day.
+
+mg
+Target Undocumented Warn(%qs is deprecated and has no effect)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+mdx-regs
+Target Undocumented Warn(%qs is deprecated and has no effect)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+mexpand-isr
+Target Undocumented Warn(%qs is deprecated and has no effect)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+mcrt-arg=yes
+Target Undocumented Warn(%qs is deprecated and has no effect, use -mcrt-arg instead)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+mcrt-cpp=yes
+Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+mcrt-exit=yes
+Target Undocumented Warn(%qs is deprecated and has no effect, use -mctor-dtor instead)
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+Os1
+Target Undocumented
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+Os2
+Target Undocumented
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+Os3
+Target Undocumented
+Obsolete option.  Users SHOULD NOT use this option in the command line.
+
+; ---------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-opts.h gcc-4.9.4/gcc/config/nds32/nds32-opts.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-opts.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-opts.h	2016-08-08 20:37:45.510270246 +0200
@@ -1,5 +1,5 @@
 /* Definitions for option handling of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -22,14 +22,80 @@
 #define NDS32_OPTS_H
 
 #define NDS32_DEFAULT_CACHE_BLOCK_SIZE 16
-#define NDS32_DEFAULT_ISR_VECTOR_SIZE (TARGET_ISA_V3 ? 4 : 16)
+#define NDS32_DEFAULT_ISR_VECTOR_SIZE TARGET_DEFAULT_ISR_VECTOR_SIZE
 
 /* The various ANDES ISA.  */
 enum nds32_arch_type
 {
   ARCH_V2,
+  ARCH_V2J,
   ARCH_V3,
-  ARCH_V3M
+  ARCH_V3J,
+  ARCH_V3M,
+  ARCH_V3F,
+  ARCH_V3S
 };
 
+/* The various ANDES CPU.  */
+enum nds32_cpu_type
+{
+  CPU_N7,
+  CPU_N8,
+  CPU_E8,
+  CPU_N9,
+  CPU_N10,
+  CPU_N12,
+  CPU_N13,
+  CPU_SIMPLE
+};
+
+/* The code model defines the address generation strategy.  */
+enum nds32_memory_model_type
+{
+  MEMORY_MODEL_SLOW,
+  MEMORY_MODEL_FAST
+};
+
+/* The code model defines the address generation strategy.  */
+enum nds32_cmodel_type
+{
+  CMODEL_SMALL,
+  CMODEL_MEDIUM,
+  CMODEL_LARGE
+};
+
+/* Multiply instruction configuration.  */
+enum nds32_mul_type
+{
+  MUL_TYPE_FAST_1,
+  MUL_TYPE_FAST_2,
+  MUL_TYPE_SLOW
+};
+
+/* Register ports configuration.  */
+enum nds32_register_ports
+{
+  REG_PORT_3R2W,
+  REG_PORT_2R1W
+};
+
+/* Which ABI to use.  */
+enum float_abi_type
+{
+  NDS32_FLOAT_ABI_SOFT,
+  NDS32_FLOAT_ABI_HARD
+};
+
+/* The various FPU number of registers.  */
+enum float_reg_number
+{
+  NDS32_CONFIG_FPU_0,
+  NDS32_CONFIG_FPU_1,
+  NDS32_CONFIG_FPU_2,
+  NDS32_CONFIG_FPU_3,
+  NDS32_CONFIG_FPU_4,
+  NDS32_CONFIG_FPU_5,
+  NDS32_CONFIG_FPU_6,
+  NDS32_CONFIG_FPU_7
+};
 #endif
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-peephole2.md gcc-4.9.4/gcc/config/nds32/nds32-peephole2.md
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-peephole2.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-peephole2.md	2016-08-08 20:37:45.510270246 +0200
@@ -1,5 +1,5 @@
 ;; define_peephole2 optimization patterns of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -19,7 +19,176 @@
 ;; <http://www.gnu.org/licenses/>.
 
 
-;; Use define_peephole and define_peephole2 to handle possible
-;; target-specific optimization in this file.
+;; Use define_split, define_peephole, and define_peephole2 to
+;; handle possible target-specific optimization in this file.
 
 ;; ------------------------------------------------------------------------
+;; Try to utilize 16-bit instruction by swap operand if possible.
+;; ------------------------------------------------------------------------
+
+;; Try to make add as add45.
+(define_peephole2
+  [(set (match_operand:QIHISI 0 "register_operand"              "")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand" "")
+		     (match_operand:QIHISI 2 "register_operand" "")))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))"
+  [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))])
+
+;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand"    "")
+	(match_operator:SI 1 "nds32_have_33_inst_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "register_operand" "")]))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0]))
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))])
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_16_BIT
+   && !TARGET_ISA_V2
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
+   && ((REGNO (operands[0]) & 0x1) == 0)
+   && ((REGNO (operands[1]) & 0x1) == 0)
+   && (REGNO (operands[0]) + 1) == REGNO (operands[2])
+   && (REGNO (operands[1]) + 1) == REGNO (operands[3])"
+  "movd44\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length" "2")])
+
+;; Merge two fcpyss to fcpysd.
+(define_peephole2
+  [(set (match_operand:SF 0 "float_even_register_operand" "")
+	(match_operand:SF 1 "float_even_register_operand" ""))
+   (set (match_operand:SF 2 "float_odd_register_operand"  "")
+	(match_operand:SF 3 "float_odd_register_operand"  ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[0]) == REGNO (operands[2]) - 1
+   && REGNO (operands[1]) == REGNO (operands[3]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  })
+
+(define_peephole2
+  [(set (match_operand:SF 0 "float_odd_register_operand"  "")
+	(match_operand:SF 1 "float_odd_register_operand"  ""))
+   (set (match_operand:SF 2 "float_even_register_operand" "")
+	(match_operand:SF 3 "float_even_register_operand" ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[2]) == REGNO (operands[0]) - 1
+   && REGNO (operands[3]) == REGNO (operands[1]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3]));
+  })
+
+;; Merge two flsi to fldi.
+(define_peephole2
+  [(set (match_operand:SF 0 "float_even_register_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "float_odd_register_operand" "")
+	(match_operand:SF 3 "memory_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[2]) - 1
+   && nds32_memory_merge_peep_p (operands[3], operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+    operands[1] = widen_memory_access (operands[3], DFmode, 0);
+    operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));
+})
+
+(define_peephole2
+  [(set (match_operand:SF 0 "float_odd_register_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "float_even_register_operand" "")
+	(match_operand:SF 3 "memory_operand" ""))]
+  "REGNO (operands[2]) == REGNO (operands[0]) - 1
+   && nds32_memory_merge_peep_p (operands[1], operands[3])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+    operands[1] = widen_memory_access (operands[1], DFmode, 0);
+    operands[0] = gen_rtx_REG (DFmode, REGNO (operands[2]));
+})
+
+;; Merge two fssi to fsdi.
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+	(match_operand:SF 1 "float_even_register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+	(match_operand:SF 3 "float_odd_register_operand" ""))]
+  "REGNO (operands[1]) == REGNO (operands[3]) - 1
+   && nds32_memory_merge_peep_p (operands[2], operands[0])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = widen_memory_access (operands[2], DFmode, 0);
+  operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+})
+
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+	(match_operand:SF 1 "float_odd_register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+	(match_operand:SF 3 "float_even_register_operand" ""))]
+  "REGNO (operands[3]) == REGNO (operands[1]) - 1
+   && nds32_memory_merge_peep_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = widen_memory_access (operands[0], DFmode, 0);
+  operands[1] = gen_rtx_REG (DFmode, REGNO (operands[3]));
+})
+
+;; ------------------------------------------------------------------------
+;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is
+;; unused, so we use split to drop mod operation for lower register pressure.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(div:SI (match_operand:SI 1 "register_operand")
+		(match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+	(div:SI (match_dup 1)
+		(match_dup 2)))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(udiv:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+	(udiv:SI (match_dup 1)
+		 (match_dup 2)))])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "NDS32_EXT_DSP_P ()
+   && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))"
+  [(const_int 1)]
+{
+  rtx highpart = nds32_di_high_part_subreg (operands[0]);
+  emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2]));
+  DONE;
+})
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-pipelines-auxiliary.c gcc-4.9.4/gcc/config/nds32/nds32-pipelines-auxiliary.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-pipelines-auxiliary.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-pipelines-auxiliary.c	2016-08-08 20:37:45.582273034 +0200
@@ -0,0 +1,2341 @@
+/* Auxiliary functions for pipeline descriptions pattern of Andes
+   NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include <set>
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+#include "tree-pass.h"
+
+/* ------------------------------------------------------------------------ */
+
+namespace nds32 {
+namespace scheduling {
+
+/* Classify the memory access direction.  It's unknown if the offset register
+   is not a constant value.  */
+enum memory_access_direction
+{
+  MEM_ACCESS_DIR_POS,
+  MEM_ACCESS_DIR_NEG,
+  MEM_ACCESS_DIR_UNKNOWN
+};
+
+/* This class provides some wrappers of the DFA scheduler.  Due to the design
+   drawback of the DFA scheduler, creating two instances at the same time is
+   now allowed.  Use the loosest relationship such as 'dependency' instead of
+   'aggregation' or 'composition' can minimize this issue.  */
+class pipeline_simulator
+{
+public:
+  pipeline_simulator ();
+  ~pipeline_simulator ();
+
+  void advance_cycle (int cycles = 1);
+  int query_latency(rtx producer, rtx consumer) const;
+  int issue_insn (rtx insn);
+  int force_issue_insn (rtx insn);
+
+private:
+  static bool gcc_dfa_initialized_;
+  state_t state_;
+};
+
+/* Insert pseudo NOPs so that we can see stall cycles caused by structural or
+   data hazards in the assembly code.  The design of this class is similar to
+   the 'template method' pattern, but we don't need to maintain multiple
+   customized algorithms at the same time.  Hence this class has no virtual
+   functions providing further customizations.  */
+class stall_inserter
+{
+private:
+  enum dep_type { RES_DEP, DATA_DEP };
+
+public:
+  void insert_stalls ();
+
+private:
+  static void compute_bb_for_insn_safe ();
+  static rtx emit_pseudo_nop_before (rtx insn, int cycles, enum dep_type type);
+
+  void insert_structural_hazard_stalls ();
+  void insert_data_hazard_stalls ();
+  void emit_pseudo_nops_for_data_hazards (rtx insn,
+					  pipeline_simulator &simulator);
+};
+
+static unsigned int nds32_print_stalls (void);
+
+const pass_data pass_data_nds32_print_stalls =
+{
+  RTL_PASS,				/* type */
+  "print_stalls",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  false,				/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_verify_rtl_sharing,		/* todo_flags_finish */
+};
+
+class pass_nds32_print_stalls : public rtl_opt_pass
+{
+public:
+  pass_nds32_print_stalls (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_print_stalls, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return nds32_print_stalls (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_print_stalls (gcc::context *ctxt)
+{
+  return new pass_nds32_print_stalls (ctxt);
+}
+
+bool pipeline_simulator::gcc_dfa_initialized_ = false;
+
+/* A safe wrapper to the function reg_overlap_mentioned_p ().  */
+bool
+reg_overlap_p (rtx x, rtx in)
+{
+  if (x == NULL_RTX || in == NULL_RTX)
+    return false;
+
+  return static_cast <bool> (reg_overlap_mentioned_p (x, in));
+}
+
+/* Get the rtx in the PATTERN field of an insn.  If INSN is not an insn,
+   the funciton doesn't change anything and returns it directly.  */
+rtx
+extract_pattern_from_insn (rtx insn)
+{
+  if (INSN_P (insn))
+    return PATTERN (insn);
+
+  return insn;
+}
+
+/* Get the number of elements in a parallel rtx.  */
+size_t
+parallel_elements (rtx parallel_rtx)
+{
+  parallel_rtx = extract_pattern_from_insn (parallel_rtx);
+  gcc_assert (GET_CODE (parallel_rtx) == PARALLEL);
+
+  return XVECLEN (parallel_rtx, 0);
+}
+
+/* Extract an rtx from a parallel rtx with index NTH.  If NTH is a negative
+   value, the function returns the last NTH rtx.  */
+rtx
+parallel_element (rtx parallel_rtx, int nth)
+{
+  parallel_rtx = extract_pattern_from_insn (parallel_rtx);
+  gcc_assert (GET_CODE (parallel_rtx) == PARALLEL);
+
+  int len = parallel_elements (parallel_rtx);
+
+  if (nth >= 0)
+    {
+      if (nth >= len)
+	return NULL_RTX;
+
+      return XVECEXP (parallel_rtx, 0, nth);
+    }
+  else
+    {
+      if (len + nth < 0)
+	return NULL_RTX;
+
+      return XVECEXP (parallel_rtx, 0, len + nth);
+    }
+}
+
+/* Return true if an insn is a pseudo NOP that is not a real instruction
+   occupying a real cycle and space of the text section.  */
+bool
+insn_pseudo_nop_p (rtx insn)
+{
+  if (INSN_CODE (insn) == CODE_FOR_nop_data_dep
+      || INSN_CODE (insn) == CODE_FOR_nop_res_dep)
+    return true;
+
+  return false;
+}
+
+/* Indicate whether an insn is a real insn which occupy at least one cycle
+   or not.  The determination cannot be target-independent because some targets
+   use UNSPEC or UNSPEC_VOLATILE insns to represent real instructions.  */
+bool
+insn_executable_p (rtx insn)
+{
+  if (!INSN_P (insn))
+    return false;
+
+  if (insn_pseudo_nop_p (insn))
+    return true;
+
+  if (get_attr_length (insn) == 0)
+    return false;
+
+  switch (GET_CODE (PATTERN (insn)))
+    {
+    case CONST_INT:
+    case USE:
+    case CLOBBER:
+    case ADDR_VEC:
+    case ADDR_DIFF_VEC:
+    case UNSPEC:
+    case UNSPEC_VOLATILE:
+      return false;
+
+    default:
+      return true;
+    }
+
+  return true;
+}
+
+/* Return true if an insn is not marked as deleted.  */
+bool
+insn_deleted_p (rtx insn)
+{
+  if (INSN_DELETED_P (insn))
+    return true;
+
+  if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED)
+    return true;
+
+  return false;
+}
+
+/* Calculate the cycle distance between two insns in pipeline view.
+   Hence each insn can be treated as one cycle.
+   TODO: multi-cycle insns should be handled
+	 specially, but we haven't done it here.  */
+int
+cycle_distance (rtx from, rtx to)
+{
+  int count = 1;
+
+  for (from = NEXT_INSN (from); from && from != to; from = NEXT_INSN (from))
+    {
+      if (!insn_executable_p (from))
+	continue;
+
+      if (insn_pseudo_nop_p (from))
+	count += INTVAL (XVECEXP (PATTERN (from), 0, 0));
+      else
+	++count;
+    }
+
+  return count;
+}
+
+/* Extract the MEM rtx from a load/store insn.  */
+rtx
+extract_mem_rtx (rtx insn)
+{
+  rtx body = PATTERN (insn);
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      if (MEM_P (SET_SRC (body)))
+	return SET_SRC (body);
+
+      /* unaligned address: (unspec [(mem)])  */
+      if (GET_CODE (SET_SRC (body)) == UNSPEC)
+	{
+	  gcc_assert (MEM_P (XVECEXP (SET_SRC (body), 0, 0)));
+	  return XVECEXP (SET_SRC (body), 0, 0);
+	}
+
+      /* (sign_extend (mem)) */
+      gcc_assert (MEM_P (XEXP (SET_SRC (body), 0)));
+      return XEXP (SET_SRC (body), 0);
+
+    case TYPE_STORE:
+      if (MEM_P (SET_DEST (body)))
+	return SET_DEST (body);
+
+      /* unaligned address: (unspec [(mem)])  */
+      if (GET_CODE (SET_DEST (body)) == UNSPEC)
+	{
+	  gcc_assert (MEM_P (XVECEXP (SET_DEST (body), 0, 0)));
+	  return XVECEXP (SET_DEST (body), 0, 0);
+	}
+
+      /* (sign_extend (mem)) */
+      gcc_assert (MEM_P (XEXP (SET_DEST (body), 0)));
+      return XEXP (SET_DEST (body), 0);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Find the post update rtx in INSN.  If INSN is a load/store multiple insn,
+   the function returns the vector index of its parallel part.  If INSN is a
+   single load/store insn, the function returns 0.  If INSN is not a post-
+   update insn, the function returns -1.  */
+int
+find_post_update_rtx (rtx insn)
+{
+  rtx mem_rtx;
+  int i, len;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      /* Find a pattern in a parallel rtx:
+	 (set (reg) (plus (reg) (const_int)))  */
+      len = parallel_elements (insn);
+      for (i = 0; i < len; ++i)
+	{
+	  rtx curr_insn = parallel_element (insn, i);
+
+	  if (GET_CODE (curr_insn) == SET
+	      && REG_P (SET_DEST (curr_insn))
+	      && GET_CODE (SET_SRC (curr_insn)) == PLUS)
+		return i;
+	}
+      return -1;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      mem_rtx = extract_mem_rtx (insn);
+      /* (mem (post_inc (reg)))  */
+      switch (GET_CODE (XEXP (mem_rtx, 0)))
+	{
+	case POST_INC:
+	case POST_DEC:
+	case POST_MODIFY:
+	  return 0;
+
+	default:
+	  return -1;
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine if INSN is a post update insn.  */
+bool
+post_update_insn_p (rtx insn)
+{
+  if (find_post_update_rtx (insn) == -1)
+    return false;
+  else
+    return true;
+}
+
+/* Extract the base register from load/store insns.  The function returns
+   NULL_RTX if the address is not consist of any registers.  */
+rtx
+extract_base_reg (rtx insn)
+{
+  int post_update_rtx_index;
+  rtx mem_rtx;
+  rtx plus_rtx;
+
+  /* Find the MEM rtx.  If we can find an insn updating the base register,
+     the base register will be returned directly.  */
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD_MULTIPLE:
+      post_update_rtx_index = find_post_update_rtx (insn);
+
+      if (post_update_rtx_index != -1)
+	return SET_DEST (parallel_element (insn, post_update_rtx_index));
+
+      mem_rtx = SET_SRC (parallel_element (insn, 0));
+      break;
+
+    case TYPE_STORE_MULTIPLE:
+      post_update_rtx_index = find_post_update_rtx (insn);
+
+      if (post_update_rtx_index != -1)
+	return SET_DEST (parallel_element (insn, post_update_rtx_index));
+
+      mem_rtx = SET_DEST (parallel_element (insn, 0));
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      mem_rtx = extract_mem_rtx (insn);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (MEM_P (mem_rtx));
+
+  /* (mem (reg))  */
+  if (REG_P (XEXP (mem_rtx, 0)))
+    return XEXP (mem_rtx, 0);
+
+  plus_rtx = XEXP (mem_rtx, 0);
+
+  if (GET_CODE (plus_rtx) == SYMBOL_REF
+      || GET_CODE (plus_rtx) == CONST)
+    return NULL_RTX;
+
+  gcc_assert (GET_CODE (plus_rtx) == PLUS
+	      || GET_CODE (plus_rtx) == POST_INC
+	      || GET_CODE (plus_rtx) == POST_DEC
+	      || GET_CODE (plus_rtx) == POST_MODIFY);
+  gcc_assert (REG_P (XEXP (plus_rtx, 0)));
+  /* (mem (plus (reg) (const_int))) or
+     (mem (post_inc (reg))) or
+     (mem (post_dec (reg))) or
+     (mem (post_modify (reg) (plus (reg) (reg))))  */
+  return XEXP (plus_rtx, 0);
+}
+
+/* Determine the memory access direction of a load/store insn.  */
+memory_access_direction
+determine_access_direction (rtx insn)
+{
+  int post_update_rtx_index;
+  rtx plus_rtx;
+  rtx mem_rtx;
+  rtx offset_rtx;
+
+  switch (get_attr_type (insn))
+  {
+  case TYPE_LOAD_MULTIPLE:
+    gcc_assert (parallel_elements (insn) >= 2);
+
+    post_update_rtx_index = find_post_update_rtx (insn);
+    if (post_update_rtx_index != -1)
+      plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index));
+    else
+      {
+	/* (parallel
+	     [(set (reg) (mem (reg)))              : index 0
+	      (set (reg) (mem (plus (reg) (...)))) : index 1
+	      ...])  */
+	mem_rtx = SET_SRC (parallel_element (insn, 1));
+	if (GET_CODE (mem_rtx) == UNSPEC)
+	  mem_rtx = XVECEXP (mem_rtx, 0, 0);
+	gcc_assert (MEM_P (mem_rtx));
+	plus_rtx = XEXP (mem_rtx, 0);
+      }
+    break;
+
+  case TYPE_STORE_MULTIPLE:
+    gcc_assert (parallel_elements (insn) >= 2);
+
+    post_update_rtx_index = find_post_update_rtx (insn);
+    if (post_update_rtx_index != -1)
+      plus_rtx = SET_SRC (parallel_element (insn, post_update_rtx_index));
+    else
+      {
+	/* (parallel
+	     [(set (mem (reg))              (reg)) : index 0
+	      (set (mem (plus (reg) (...))) (reg)) : index 1
+	      ...])  */
+	mem_rtx = SET_DEST (parallel_element (insn, 1));
+	if (GET_CODE (mem_rtx) == UNSPEC)
+	  mem_rtx = XVECEXP (mem_rtx, 0, 0);
+	gcc_assert (MEM_P (mem_rtx));
+	plus_rtx = XEXP (mem_rtx, 0);
+      }
+    break;
+
+  case TYPE_LOAD:
+  case TYPE_STORE:
+    mem_rtx = extract_mem_rtx (insn);
+
+    switch (GET_CODE (XEXP (mem_rtx, 0)))
+      {
+      case POST_INC:
+	/* (mem (post_inc (...)))  */
+	return MEM_ACCESS_DIR_POS;
+
+      case POST_DEC:
+	/* (mem (post_dec (...)))  */
+	return MEM_ACCESS_DIR_NEG;
+
+      case PLUS:
+	/* (mem (plus (reg) (...)))  */
+	plus_rtx = XEXP (mem_rtx, 0);
+	break;
+
+      case POST_MODIFY:
+	/* (mem (post_modify (reg) (plus (reg) (...))))  */
+	plus_rtx = XEXP (XEXP (mem_rtx, 0), 1);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    break;
+
+  default:
+    gcc_unreachable ();
+  }
+
+  gcc_assert (GET_CODE (plus_rtx) == PLUS);
+
+  offset_rtx = XEXP (plus_rtx, 1);
+  if (GET_CODE (offset_rtx) == CONST_INT)
+    {
+      if (INTVAL (offset_rtx) < 0)
+	return MEM_ACCESS_DIR_NEG;
+      else
+	return MEM_ACCESS_DIR_POS;
+    }
+
+  return MEM_ACCESS_DIR_UNKNOWN;
+}
+
+/* Return the nth load/store operation in the real micro-operation
+   accessing order.  */
+rtx
+extract_nth_access_rtx (rtx insn, int n)
+{
+  int n_elems = parallel_elements (insn);
+  int post_update_rtx_index = find_post_update_rtx (insn);
+  memory_access_direction direction = determine_access_direction (insn);
+
+  gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN);
+
+  /* Reverse the order if the direction negative.  */
+  if (direction == MEM_ACCESS_DIR_NEG)
+    n = -1 * n - 1;
+
+  if (post_update_rtx_index != -1)
+    {
+      if (n >= 0 && post_update_rtx_index <= n)
+	++n;
+      else if (n < 0 && post_update_rtx_index >= n + n_elems)
+	--n;
+    }
+
+  return parallel_element (insn, n);
+}
+
+/* Returns the register operated by the nth load/store operation in the real
+   micro-operation accessing order.  This function assumes INSN must be a
+   multiple-word load/store insn.  */
+rtx
+extract_nth_lmsw_access_reg (rtx insn, int n)
+{
+  rtx nth_rtx = extract_nth_access_rtx (insn, n);
+
+  if (nth_rtx == NULL_RTX)
+    return NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD_MULTIPLE:
+      return SET_DEST (nth_rtx);
+
+    case TYPE_STORE_MULTIPLE:
+      return SET_SRC (nth_rtx);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Returns the register operated by the nth load/store operation in the real
+   micro-operation accessing order.  This function assumes INSN must be a
+   double-word load/store insn.  */
+rtx
+extract_nth_ls2_access_reg (rtx insn, int n)
+{
+  rtx reg;
+  enum machine_mode mode;
+
+  if (post_update_insn_p (insn))
+    {
+      memory_access_direction direction = determine_access_direction (insn);
+      gcc_assert (direction != MEM_ACCESS_DIR_UNKNOWN);
+
+      /* Reverse the order if the direction negative.  */
+      if (direction == MEM_ACCESS_DIR_NEG)
+	n = -1 * n - 1;
+    }
+
+  /* Handle the out-of-range case.  */
+  if (n < -2 || n > 1)
+    return NULL_RTX;
+
+  /* Convert the index to a positive one.  */
+  if (n < 0)
+    n = 2 + n;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      reg = SET_DEST (PATTERN (insn));
+      break;
+
+    case TYPE_STORE:
+      reg = SET_SRC (PATTERN (insn));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (REG_P (reg) || GET_CODE (reg) == SUBREG);
+
+  switch (GET_MODE (reg))
+    {
+    case DImode:
+      mode = SImode;
+      break;
+
+    case DFmode:
+      mode = SFmode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (n == 0)
+    return gen_lowpart (mode, reg);
+  else
+    return gen_highpart (mode, reg);
+}
+
+/* Returns the register operated by the nth load/store operation in the real
+   micro-operation accessing order.  */
+rtx
+extract_nth_access_reg (rtx insn, int index)
+{
+  switch (GET_CODE (PATTERN (insn)))
+    {
+    case PARALLEL:
+      return extract_nth_lmsw_access_reg (insn, index);
+
+    case SET:
+      return extract_nth_ls2_access_reg (insn, index);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Check if a load/store insn uses a register as a base or offset register.  */
+bool
+address_use_reg_p (rtx insn, rtx use_reg)
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      if (rtx_equal_p (use_reg, extract_base_reg (insn)))
+	return true;
+      return false;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      if (reg_overlap_p (use_reg, extract_mem_rtx (insn)))
+	return true;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Extract the register of the shift operand from an ALU_SHIFT rtx.  */
+rtx
+extract_shift_reg (rtx alu_shift_rtx)
+{
+  alu_shift_rtx = extract_pattern_from_insn (alu_shift_rtx);
+
+  rtx alu_rtx = SET_SRC (alu_shift_rtx);
+  rtx shift_rtx;
+
+  /* Various forms of ALU_SHIFT can be made by the combiner.
+     See the difference between add_slli and sub_slli in nds32.md.  */
+  if (REG_P (XEXP (alu_rtx, 0)))
+    shift_rtx = XEXP (alu_rtx, 1);
+  else
+    shift_rtx = XEXP (alu_rtx, 0);
+
+  return XEXP (shift_rtx, 0);
+}
+
+
+/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
+   value of DEF_REG in its Ra or Rb fields.  */
+bool
+pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg)
+{
+  rtx unspec_rtx = SET_SRC (PATTERN (pbsada_insn));
+  gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
+
+  rtx pbsada_ra = XVECEXP (unspec_rtx, 0, 0);
+  rtx pbsada_rb = XVECEXP (unspec_rtx, 0, 1);
+
+  if (rtx_equal_p (def_reg, pbsada_ra)
+      || rtx_equal_p (def_reg, pbsada_rb))
+    return true;
+
+  return false;
+}
+
+/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
+   value of DEF_REG in its Rt field.  */
+bool
+pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg)
+{
+  rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn));
+
+  if (rtx_equal_p (def_reg, pbsada_rt))
+    return true;
+
+  return false;
+}
+
+/* Check if the address of MEM_RTX consists of a base register and an
+   immediate offset.  */
+bool
+immed_offset_p (rtx mem_rtx)
+{
+  gcc_assert (MEM_P (mem_rtx));
+
+  rtx addr_rtx = XEXP (mem_rtx, 0);
+
+  /* (mem (reg)) is equivalent to (mem (plus (reg) (const_int 0))) */
+  if (REG_P (addr_rtx))
+    return true;
+
+  /* (mem (plus (reg) (const_int))) */
+  if (GET_CODE (addr_rtx) == PLUS
+      && GET_CODE (XEXP (addr_rtx, 1)) == CONST_INT)
+    return true;
+
+  return false;
+}
+
+/* Check if INSN is a movd44 insn.  */
+bool
+movd44_insn_p (rtx insn)
+{
+  if (get_attr_type (insn) == TYPE_ALU
+      && (INSN_CODE (insn) == CODE_FOR_move_di
+	  || INSN_CODE (insn) == CODE_FOR_move_df))
+    {
+      rtx body = PATTERN (insn);
+      gcc_assert (GET_CODE (body) == SET);
+
+      rtx src = SET_SRC (body);
+      rtx dest = SET_DEST (body);
+
+      if ((REG_P (src) || GET_CODE (src) == SUBREG)
+	  && (REG_P (dest) || GET_CODE (dest) == SUBREG))
+	return true;
+
+      return false;
+    }
+
+  return false;
+}
+
+/* Check if INSN is a movd44 insn consuming DEF_REG.  */
+bool
+movd44_even_dep_p (rtx insn, rtx def_reg)
+{
+  if (!movd44_insn_p (insn))
+    return false;
+
+  rtx use_rtx = SET_SRC (PATTERN (insn));
+
+  if (REG_P (def_reg))
+    {
+      return rtx_equal_p (def_reg, use_rtx);
+    }
+  else if (GET_CODE (def_reg) == SUBREG
+	   && GET_MODE (def_reg) == SImode
+	   && rtx_equal_p (SUBREG_REG (def_reg), use_rtx))
+    {
+      if (TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 4)
+	return true;
+
+      if (!TARGET_BIG_ENDIAN && SUBREG_BYTE (def_reg) == 0)
+	return true;
+
+      return false;
+    }
+
+  return false;
+}
+
+/* Extract the first result (even reg) of a movd44 insn.  */
+rtx
+extract_movd44_even_reg (rtx insn)
+{
+  gcc_assert (movd44_insn_p (insn));
+
+  rtx def_reg = SET_DEST (PATTERN (insn));
+  enum machine_mode mode;
+
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+  switch (GET_MODE (def_reg))
+    {
+    case DImode:
+      mode = SImode;
+      break;
+
+    case DFmode:
+      mode = SFmode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_lowpart (mode, def_reg);
+}
+
+/* Extract the second result (odd reg) of a movd44 insn.  */
+rtx
+extract_movd44_odd_reg (rtx insn)
+{
+  gcc_assert (movd44_insn_p (insn));
+
+  rtx def_reg = SET_DEST (PATTERN (insn));
+  enum machine_mode mode;
+
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+  switch (GET_MODE (def_reg))
+    {
+    case DImode:
+      mode = SImode;
+      break;
+
+    case DFmode:
+      mode = SFmode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_highpart (mode, def_reg);
+}
+
+/* Extract the rtx representing the branch target to help recognize
+   data hazards.  */
+rtx
+extract_branch_target_rtx (rtx insn)
+{
+  gcc_assert (CALL_P (insn) || JUMP_P (insn));
+
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == SET)
+    {
+      /* RTXs in IF_THEN_ELSE are branch conditions.  */
+      if (GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)
+	return NULL_RTX;
+
+      return SET_SRC (body);
+    }
+
+  if (GET_CODE (body) == CALL)
+    return XEXP (body, 0);
+
+  if (GET_CODE (body) == PARALLEL)
+    {
+      rtx first_rtx = parallel_element (body, 0);
+
+      if (GET_CODE (first_rtx) == SET)
+	return SET_SRC (first_rtx);
+
+      if (GET_CODE (first_rtx) == CALL)
+	return XEXP (first_rtx, 0);
+    }
+
+  /* Handle special cases of bltzal, bgezal and jralnez.  */
+  if (GET_CODE (body) == COND_EXEC)
+    {
+      rtx addr_rtx = XEXP (body, 1);
+
+      if (GET_CODE (addr_rtx) == SET)
+	return SET_SRC (addr_rtx);
+
+      if (GET_CODE (addr_rtx) == PARALLEL)
+	{
+	  rtx first_rtx = parallel_element (addr_rtx, 0);
+
+	  if (GET_CODE (first_rtx) == SET)
+	    {
+	      rtx call_rtx = SET_SRC (first_rtx);
+	      gcc_assert (GET_CODE (call_rtx) == CALL);
+
+	      return XEXP (call_rtx, 0);
+	    }
+
+	  if (GET_CODE (first_rtx) == CALL)
+	    return XEXP (first_rtx, 0);
+	}
+    }
+
+  gcc_unreachable ();
+}
+
+/* Extract the rtx representing the branch condition to help recognize
+   data hazards.  */
+rtx
+extract_branch_condition_rtx (rtx insn)
+{
+  gcc_assert (CALL_P (insn) || JUMP_P (insn));
+
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == SET)
+    {
+      rtx if_then_else_rtx = SET_SRC (body);
+
+      if (GET_CODE (if_then_else_rtx) == IF_THEN_ELSE)
+	return XEXP (if_then_else_rtx, 0);
+
+      return NULL_RTX;
+    }
+
+  if (GET_CODE (body) == COND_EXEC)
+    return XEXP (body, 0);
+
+  return NULL_RTX;
+}
+
+pipeline_simulator::pipeline_simulator ()
+{
+  /* The design of dfa_start () operates on static global variables and
+     allocates memory space without checking whether the function is called
+     twice or not.  We add some guards in order to protect it from abusing.  */
+  gcc_assert(gcc_dfa_initialized_ == false);
+  if(!gcc_dfa_initialized_)
+    {
+      dfa_start ();
+      gcc_dfa_initialized_ = true;
+    }
+
+  state_ = xmalloc (sizeof (state_size()));
+  state_reset (state_);
+}
+
+pipeline_simulator::~pipeline_simulator ()
+{
+  /* The design of dfa_finish () operates on a static global variable and
+     deallocates memory space without checking whether the function is called
+     twice or not.  We add some guards in order to protect it from abusing.  */
+  free (state_);
+
+  gcc_assert(gcc_dfa_initialized_ == true);
+  if(gcc_dfa_initialized_)
+    {
+      dfa_finish ();
+      gcc_dfa_initialized_ = false;
+    }
+}
+
+void
+pipeline_simulator::advance_cycle (int cycles)
+{
+  gcc_assert (cycles > 0);
+
+  /* The second argument was 'NULL', but we found the expression is directly
+     written in insn-automata.c:
+       if (insn == 0)
+	 insn_code = DFA__ADVANCE_CYCLE;
+     Hence we change it to '0' in order to make it consistent.  */
+  while (cycles--)
+    state_transition (state_, 0);
+}
+
+/* A wrapper of insn_latency () provided by the insn-attr.h in the object tree.
+   See that file for more information.  */
+int
+pipeline_simulator::query_latency (rtx producer, rtx consumer) const
+{
+  return insn_latency (producer, consumer);
+}
+
+/* Return 0 or negative if we can issue INSN at the current cycle.  Otherwise,
+   return a postive value indicates how many cycles we have to wait.  The
+   interface is consistent with state_transition () provided by insn-attr.h
+   in the object directory.  See that file for more information.  */
+int
+pipeline_simulator::issue_insn (rtx insn)
+{
+  int stalls;
+
+  /* Skip cycles specified by pseudo NOPs.  */
+  if (insn_pseudo_nop_p (insn))
+    {
+      int nop_stalls = INTVAL (XVECEXP (PATTERN (insn), 0, 0));
+
+      gcc_assert (nop_stalls > 0);
+      advance_cycle (nop_stalls);
+      stalls = -1;
+    }
+  else
+    {
+      stalls = state_transition (state_, insn);
+
+      /* All targets are single-issue, so we advance one cycle once after
+	 an insn has been issued successfully.  */
+      if (stalls <= 0)
+	advance_cycle ();
+    }
+
+  return stalls;
+}
+
+/* This function is similar to issue_insn (), but it advances cycles until INSN
+   can be issued successfully.  If INSN can be issued at the current cycle, the
+   return value will be 0 or negaitive.  Otherwise, the function will return
+   the cycles it has been skipped.  */
+int
+pipeline_simulator::force_issue_insn (rtx insn)
+{
+  int stalls;
+
+  stalls = issue_insn (insn);
+
+  /* Skip cycles until we can issue the insn.  */
+  if (stalls > 0)
+    {
+      advance_cycle (stalls);
+      issue_insn (insn);
+    }
+
+  return stalls;
+}
+
+/* The main flow of the class STALL_INSERTER.  We insert NOPs for structural
+   hazards because self-stalled instructions also consume the delay cycles
+   caused by data hazards.  */
+void
+stall_inserter::insert_stalls ()
+{
+  compute_bb_for_insn_safe ();
+
+  insert_structural_hazard_stalls ();
+  insert_data_hazard_stalls ();
+
+  /* We have to call the following two functions again after we inserting
+     some insns after it has been invoked.  Otherwise, an assert expression
+     in final () will be triggered and cause to an internal compiler error.  */
+  init_insn_lengths ();
+  shorten_branches (get_insns ());
+
+  free_bb_for_insn ();
+}
+
+/* Building the CFG in later back end passes cannot call compute_bb_for_insn ()
+   directly because there are deleted and calling to BLOCK_FOR_INSN (insn) will
+   cause the segmentation fault.  Use this function to rebuild the CFG can
+   avoid such issues.  */
+void
+stall_inserter::compute_bb_for_insn_safe ()
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx insn, next_insn, last_insn;
+      bool after_last_insn = false;
+
+      /* Have last_insn = the last insn which is not deleted.  */
+      for (last_insn = BB_END (bb);
+	   PREV_INSN (last_insn) && insn_deleted_p (last_insn);
+	   last_insn = PREV_INSN (last_insn));
+
+      /* Bind each insn to its BB and adjust BB_END (bb).  */
+      for (insn = BB_HEAD (bb); insn; insn = NEXT_INSN (insn))
+	{
+	  BLOCK_FOR_INSN (insn) = bb;
+
+	  if(insn == last_insn) after_last_insn = true;
+	  next_insn = NEXT_INSN (insn);
+
+	  if(after_last_insn
+	     && (!next_insn
+		 || LABEL_P (next_insn)
+		 || NOTE_INSN_BASIC_BLOCK_P (next_insn)))
+	    {
+	      BB_END (bb) = insn;
+	      break;
+	    }
+	}
+    }
+}
+
+/* A helper function inserting NOPs.  CYCLES indicates how many cycles the NOP
+   insn consumes.  TYPE indicates what type of the NOP insn we want to insert;
+   now there are two types available: RES_DEP and DATA_DEP.  */
+rtx
+stall_inserter::emit_pseudo_nop_before (
+    rtx insn, int cycles, enum dep_type type)
+{
+  rtx nop_insn;
+  int recog;
+
+  switch (type)
+  {
+  case RES_DEP:
+    nop_insn = gen_nop_res_dep (GEN_INT (cycles));
+    break;
+  case DATA_DEP:
+    nop_insn = gen_nop_data_dep (GEN_INT (cycles));
+    break;
+  default:
+    gcc_unreachable ();
+  }
+
+  nop_insn = emit_insn_before (nop_insn, insn);
+  recog = recog_memoized (nop_insn);
+  gcc_assert(recog != -1);
+
+  return nop_insn;
+}
+
+void
+stall_inserter::insert_structural_hazard_stalls ()
+{
+  pipeline_simulator simulator;
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (!insn_executable_p (insn)) continue;
+
+      int stalls = simulator.force_issue_insn (insn);
+
+      if (stalls > 0)
+	emit_pseudo_nop_before (insn, stalls, RES_DEP);
+    }
+}
+
+void
+stall_inserter::insert_data_hazard_stalls ()
+{
+  pipeline_simulator simulator;
+  rtx insn;
+
+  /* Calling to df_insn_rescan_all here is required in order to avoid crash
+     when some special options are specified by users, such as
+     -O0 -fschedule-insns2.  */
+  df_chain_add_problem (DF_DU_CHAIN);
+  df_insn_rescan_all ();
+  df_analyze ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (!insn_executable_p (insn)) continue;
+
+      simulator.force_issue_insn (insn);
+      emit_pseudo_nops_for_data_hazards (insn, simulator);
+    }
+
+  /* We must call df_finish_pass manually because it should be invoked before
+     BB information is destroyed.  Hence we cannot set the TODO_df_finish flag
+     to the pass manager.  */
+  df_insn_rescan_all ();
+  df_finish_pass (false);
+}
+
+/* Traverse all insns using the results produced by INSN and ask SIMULATOR
+   how many delay cycles between them.  If there are some delay cycles, insert
+   corresponding NOP insns there.  */
+void
+stall_inserter::emit_pseudo_nops_for_data_hazards (
+    rtx insn, pipeline_simulator &simulator)
+{
+  df_ref *def_record;
+  df_link *link;
+  std::set<rtx> processed_insns;
+
+  for (def_record = DF_INSN_DEFS (insn); *def_record; ++def_record)
+    {
+      for (link = DF_REF_CHAIN (*def_record); link; link = link->next)
+	{
+	  if (!DF_REF_INSN_INFO (link->ref))
+	    continue;
+
+	  rtx use_insn = DF_REF_INSN (link->ref);
+
+	  if (!insn_executable_p (use_insn)
+	      || processed_insns.count (use_insn))
+	    continue;
+
+	  int stalls = simulator.query_latency (insn, use_insn);
+	  int distance = cycle_distance (insn, use_insn);
+
+	  if (stalls > distance)
+	    {
+	      stalls -= distance;
+	      emit_pseudo_nop_before (use_insn, stalls, DATA_DEP);
+	      processed_insns.insert (use_insn);
+	    }
+	}
+    }
+}
+
+unsigned int
+nds32_print_stalls (void)
+{
+  stall_inserter inserter;
+
+  inserter.insert_stalls ();
+  return 0;
+}
+
+} // namespace scheduling
+} // namespace nds32
+
+/* ------------------------------------------------------------------------ */
+
+using namespace nds32::scheduling;
+
+namespace { // anonymous namespace
+
+bool
+n7_consumed_by_ii_dep_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx, acc_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+      if (movd44_even_dep_p (consumer, def_reg))
+	return true;
+
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MAC:
+      acc_rtx = SET_SRC (PATTERN (consumer));
+
+      if (REG_P (XEXP (acc_rtx, 0)))
+	use_rtx = XEXP (acc_rtx, 1);
+      else
+	use_rtx = XEXP (acc_rtx, 0);
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_LOAD:
+      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
+      if (post_update_insn_p (consumer))
+	use_rtx = extract_base_reg (consumer);
+      else
+	use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_STORE:
+      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
+      if (post_update_insn_p (consumer))
+	use_rtx = extract_base_reg (consumer);
+      else
+	use_rtx = extract_mem_rtx (consumer);
+
+      if (reg_overlap_p (def_reg, use_rtx))
+	return true;
+
+      /* ST_bi, ST_!bi_RI */
+      if (!post_update_insn_p (consumer)
+	  && !immed_offset_p (extract_mem_rtx (consumer)))
+	return false;
+
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_STORE_MULTIPLE:
+      /* ADDR_IN */
+      use_rtx = extract_base_reg (consumer);
+      if (reg_overlap_p (def_reg, use_rtx))
+	return true;
+
+      /* SMW (N, 1) */
+      use_rtx = extract_nth_access_rtx (consumer, 0);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+bool
+n8_consumed_by_addr_in_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_target_rtx (consumer);
+      break;
+
+    case TYPE_LOAD:
+      if (nds32_load_single_p (consumer))
+	use_rtx = extract_mem_rtx (consumer);
+      else
+	use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_STORE:
+      if (nds32_store_single_p (consumer)
+	  && (!post_update_insn_p (consumer)
+	      || immed_offset_p (extract_mem_rtx (consumer))))
+	use_rtx = extract_mem_rtx (consumer);
+      else
+	use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return reg_overlap_p (def_reg, use_rtx);
+}
+
+bool
+n8_consumed_by_ex_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx, acc_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+      if (movd44_even_dep_p (consumer, def_reg))
+	return true;
+
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MAC:
+      acc_rtx = SET_SRC (PATTERN (consumer));
+
+      if (REG_P (XEXP (acc_rtx, 0)))
+	use_rtx = XEXP (acc_rtx, 1);
+      else
+	use_rtx = XEXP (acc_rtx, 0);
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_condition_rtx (consumer);
+      break;
+
+    case TYPE_STORE:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_nth_access_rtx (consumer, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return reg_overlap_p (def_reg, use_rtx);
+}
+
+bool
+e8_consumed_by_addr_in_p (rtx consumer, rtx def_reg)
+{
+  return n8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+e8_consumed_by_ex_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MUL:
+    case TYPE_MAC:
+    case TYPE_DIV:
+    case TYPE_BRANCH:
+    case TYPE_STORE:
+    case TYPE_STORE_MULTIPLE:
+      return n8_consumed_by_ex_p (consumer, def_reg);
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return reg_overlap_p (def_reg, use_rtx);
+}
+
+bool
+n9_2r1w_consumed_by_ex_dep_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+      if (movd44_even_dep_p (consumer, def_reg))
+	return true;
+
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_MAC:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_LOAD:
+      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
+      if (post_update_insn_p (consumer))
+	use_rtx = extract_base_reg (consumer);
+      else
+	use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_STORE:
+      /* ADDR_IN_bi_Ra, ADDR_IN_!bi */
+      if (post_update_insn_p (consumer))
+	use_rtx = extract_base_reg (consumer);
+      else
+	use_rtx = extract_mem_rtx (consumer);
+
+      if (reg_overlap_p (def_reg, use_rtx))
+	return true;
+
+      /* ST_bi, ST_!bi_RI */
+      if (!post_update_insn_p (consumer)
+	  && !immed_offset_p (extract_mem_rtx (consumer)))
+	return false;
+
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_STORE_MULTIPLE:
+      /* ADDR_IN */
+      use_rtx = extract_base_reg (consumer);
+      if (reg_overlap_p (def_reg, use_rtx))
+	return true;
+
+      /* SMW (N, 1) */
+      use_rtx = extract_nth_access_rtx (consumer, 0);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+bool
+n9_3r2w_consumed_by_ex_dep_p (rtx consumer, rtx def_reg)
+{
+  rtx acc_rtx, use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_MAC:
+      acc_rtx = SET_SRC (PATTERN (consumer));
+
+      if (REG_P (XEXP (acc_rtx, 0)))
+	use_rtx = XEXP (acc_rtx, 1);
+      else
+	use_rtx = XEXP (acc_rtx, 0);
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+bool
+n13_alu_e1_insn_dep_reg_p (rtx alu_e1_insn, rtx def_reg)
+{
+  rtx unspec_rtx, operand_ra, operand_rb;
+  rtx src_rtx, dst_rtx;
+
+  switch (INSN_CODE (alu_e1_insn))
+    {
+    case CODE_FOR_unspec_bsp:
+    case CODE_FOR_unspec_bse:
+      unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0));
+      gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
+
+      operand_ra = XVECEXP (unspec_rtx, 0, 0);
+      operand_rb = XVECEXP (unspec_rtx, 0, 1);
+
+      if (rtx_equal_p (def_reg, operand_ra)
+	  || rtx_equal_p (def_reg, operand_rb))
+	return true;
+
+      return false;
+
+    case CODE_FOR_move_di:
+    case CODE_FOR_move_df:
+      src_rtx = SET_SRC (PATTERN (alu_e1_insn));
+      dst_rtx = SET_DEST (PATTERN (alu_e1_insn));
+
+      if (REG_P (dst_rtx) && REG_P (src_rtx)
+	  && rtx_equal_p (src_rtx, def_reg))
+	return true;
+
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+bool
+n13_consumed_by_e1_dep_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx, acc_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+      return n13_alu_e1_insn_dep_reg_p (consumer, def_reg);
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MAC:
+      acc_rtx = SET_SRC (PATTERN (consumer));
+
+      if (REG_P (XEXP (acc_rtx, 0)))
+	use_rtx = XEXP (acc_rtx, 1);
+      else
+	use_rtx = XEXP (acc_rtx, 0);
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_target_rtx (consumer);
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    default:
+      return false;
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+bool
+n13_consumed_by_e2_dep_p (rtx consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+    case TYPE_STORE:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_rt_dep_reg_p (consumer, def_reg);
+
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_nth_access_rtx (consumer, 0);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_condition_rtx (consumer);
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+} // anonymous namespace
+
+/* ------------------------------------------------------------------------ */
+
+/* Functions to determine whether INSN is single-word or double-word
+   load/store insn.  */
+
+bool
+nds32_load_single_p (rtx insn)
+{
+  if (get_attr_type (insn) != TYPE_LOAD)
+    return false;
+
+  if (INSN_CODE (insn) == CODE_FOR_move_di ||
+      INSN_CODE (insn) == CODE_FOR_move_df)
+    return false;
+
+  return true;
+}
+
+bool
+nds32_store_single_p (rtx insn)
+{
+  if (get_attr_type (insn) != TYPE_STORE)
+    return false;
+
+  if (INSN_CODE (insn) == CODE_FOR_move_di ||
+      INSN_CODE (insn) == CODE_FOR_move_df)
+    return false;
+
+  return true;
+}
+
+bool
+nds32_load_double_p (rtx insn)
+{
+  if (get_attr_type (insn) != TYPE_LOAD)
+    return false;
+
+  if (INSN_CODE (insn) != CODE_FOR_move_di &&
+      INSN_CODE (insn) != CODE_FOR_move_df)
+    return false;
+
+  return true;
+}
+
+bool
+nds32_store_double_p (rtx insn)
+{
+  if (get_attr_type (insn) != TYPE_STORE)
+    return false;
+
+  if (INSN_CODE (insn) != CODE_FOR_move_di &&
+      INSN_CODE (insn) != CODE_FOR_move_df)
+    return false;
+
+  return true;
+}
+
+/* Guard functions for N7 core.  */
+
+bool
+nds32_n7_load_to_ii_p (rtx producer, rtx consumer)
+{
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return n7_consumed_by_ii_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n7_last_load_to_ii_p (rtx producer, rtx consumer)
+{
+  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
+     the base register and the result is ready in II stage, so we don't need
+     to handle that case in this guard function and the corresponding bypass
+     rule.  */
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (last_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
+
+  return n7_consumed_by_ii_dep_p (consumer, last_def_reg);
+}
+
+/* Guard functions for N8 core.  */
+
+bool
+nds32_n8_load_to_ii_p (rtx producer, rtx consumer)
+{
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return n8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+nds32_n8_load_bi_to_ii_p (rtx producer, rtx consumer)
+{
+  if (!post_update_insn_p (producer))
+    return false;
+
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return n8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+nds32_n8_load_to_ex_p (rtx producer, rtx consumer)
+{
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return n8_consumed_by_ex_p (consumer, def_reg);
+}
+
+bool
+nds32_n8_ex_to_ii_p (rtx producer, rtx consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_ALU:
+      if (movd44_insn_p (producer))
+	def_reg = extract_movd44_odd_reg (producer);
+      else
+	def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+	def_reg = SET_DEST (parallel_element (producer, 1));
+      else
+	def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      if (!post_update_insn_p (producer))
+	return false;
+
+      def_reg = extract_base_reg (producer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return n8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+nds32_n8_last_load_to_ii_p (rtx producer, rtx consumer)
+{
+  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
+     the base register and the result is ready in EX stage, so we don't need
+     to handle that case in this guard function and the corresponding bypass
+     rule.  */
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (last_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
+
+  return n8_consumed_by_addr_in_p (consumer, last_def_reg);
+}
+
+bool
+nds32_n8_last_load_two_to_ii_p (rtx producer, rtx consumer)
+{
+  int index = -2;
+
+  /* If PRODUCER is a post-update insn, there is an additional one micro-
+     operation inserted in the end, so the last memory access operation should
+     be handled by this guard function and the corresponding bypass rule.  */
+  if (post_update_insn_p (producer))
+    index = -1;
+
+  rtx last_two_def_reg = extract_nth_access_reg (producer, index);
+
+  if (last_two_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_two_def_reg)
+	      || GET_CODE (last_two_def_reg) == SUBREG);
+
+  return n8_consumed_by_addr_in_p (consumer, last_two_def_reg);
+}
+
+bool
+nds32_n8_last_load_to_ex_p (rtx producer, rtx consumer)
+{
+  /* If PRODUCER is a post-update LMW insn, the last micro-operation updates
+     the base register and the result is ready in EX stage, so we don't need
+     to handle that case in this guard function and the corresponding bypass
+     rule.  */
+  if (post_update_insn_p (producer))
+    return false;
+
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (last_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
+
+  return n8_consumed_by_ex_p (consumer, last_def_reg);
+}
+
+/* Guard functions for E8 cores.  */
+
+bool
+nds32_e8_load_to_ii_p (rtx producer, rtx consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return e8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+nds32_e8_load_to_ex_p (rtx producer, rtx consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  return e8_consumed_by_ex_p (consumer, def_reg);
+}
+
+bool
+nds32_e8_ex_to_ii_p (rtx producer, rtx consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_ALU:
+      /* No data hazards if AGEN's input is produced by MOVI or SETHI.  */
+      if (GET_CODE (PATTERN (producer)) == SET)
+	{
+	  rtx dest = SET_DEST (PATTERN (producer));
+	  rtx src = SET_SRC (PATTERN (producer));
+
+	  if ((REG_P (dest) || GET_CODE (dest) == SUBREG)
+	      && (GET_CODE (src) == CONST_INT || GET_CODE (src) == HIGH))
+	    return false;
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (e8_consumed_by_addr_in_p (consumer, def_reg1)
+		  || e8_consumed_by_addr_in_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      if (!post_update_insn_p (producer))
+	return false;
+
+      def_reg = extract_base_reg (producer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return e8_consumed_by_addr_in_p (consumer, def_reg);
+}
+
+bool
+nds32_e8_last_load_to_ii_p (rtx producer, rtx consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (last_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
+
+  return e8_consumed_by_addr_in_p (consumer, last_def_reg);
+}
+
+bool
+nds32_e8_last_load_to_ex_p (rtx producer, rtx consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (last_def_reg == NULL_RTX)
+    return false;
+
+  gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
+
+  return e8_consumed_by_ex_p (consumer, last_def_reg);
+}
+
+/* Guard functions for N9/N10 cores.  */
+
+bool
+nds32_n9_2r1w_mm_to_ex_p (rtx producer, rtx consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+      if (post_update_insn_p (producer))
+	return false;
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    return n9_2r1w_consumed_by_ex_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n9_3r2w_mm_to_ex_p (rtx producer, rtx consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_DIV:
+      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg1)
+		  || n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    return n9_3r2w_consumed_by_ex_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n9_last_load_to_ex_p (rtx producer, rtx consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  if (nds32_register_ports_config == REG_PORT_2R1W)
+    {
+      /* The base-update micro operation occupies the last cycle.  */
+      if (post_update_insn_p (producer))
+	return false;
+
+      /* When the base register is in the list of a load multiple insn and the
+	 access order of the base register is not the last one, we need an
+	 additional micro operation to commit the load result to the base
+	 register -- we can treat the base register as the last defined
+	 register.  */
+      size_t i;
+      size_t n_elems = parallel_elements (producer);
+      rtx base_reg = extract_base_reg (producer);
+
+      for (i = 0; i < n_elems; ++i)
+	{
+	  rtx load_rtx = extract_nth_access_rtx (producer, i);
+	  rtx list_element = SET_DEST (load_rtx);
+
+	  if (rtx_equal_p (base_reg, list_element) && i != n_elems - 1)
+	    {
+	      last_def_reg = base_reg;
+	      break;
+	    }
+	}
+
+      return n9_2r1w_consumed_by_ex_dep_p (consumer, last_def_reg);
+    }
+  else
+    return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg);
+}
+
+/* Guard functions for N12/N13 cores.  */
+
+bool
+nds32_n13_addr_in_p (rtx producer, rtx consumer)
+{
+  rtx reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+    case TYPE_MUL:
+    case TYPE_ALU:
+    case TYPE_ALU_SHIFT:
+      reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      return false;
+    }
+
+  if (address_use_reg_p (consumer, reg))
+    return true;
+
+  return false;
+}
+
+bool
+nds32_n13_e2_to_e1_p (rtx producer, rtx consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      if (!post_update_insn_p (producer))
+	return false;
+
+      def_reg = extract_base_reg (producer);
+      break;
+
+    case TYPE_ALU:
+    case TYPE_ALU_SHIFT:
+    case TYPE_PBSAD:
+    case TYPE_PBSADA:
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_BRANCH:
+      return true;
+
+    case TYPE_DIV:
+      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
+	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (n13_consumed_by_e1_dep_p (consumer, def_reg1)
+		  || n13_consumed_by_e1_dep_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n13_load_to_e1_p (rtx producer, rtx consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+  return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n13_load_to_e2_p (rtx producer, rtx consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+  return n13_consumed_by_e2_dep_p (consumer, def_reg);
+}
+
+bool
+nds32_n13_last_load_to_e1_p (rtx producer, rtx consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return n13_consumed_by_e1_dep_p (consumer, last_def_reg);
+}
+
+bool
+nds32_n13_last_load_to_e2_p (rtx producer, rtx consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return n13_consumed_by_e2_dep_p (consumer, last_def_reg);
+}
+
+bool
+nds32_n13_last_two_load_to_e1_p (rtx producer, rtx consumer)
+{
+  rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
+
+  if (last_two_def_reg == NULL_RTX)
+    return false;
+
+  return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg);
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-predicates.c gcc-4.9.4/gcc/config/nds32/nds32-predicates.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-predicates.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-predicates.c	2016-08-08 20:37:45.582273034 +0200
@@ -0,0 +1,714 @@
+/* Predicate functions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ------------------------------------------------------------------------ */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"	/* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"		/* For DFA state_t.  */
+#include "insn-codes.h"		/* For CODE_FOR_xxx.  */
+#include "reload.h"		/* For push_reload().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "optabs.h"		/* For GEN_FCN.  */
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"		/* For add_builtin_function().  */
+#include "ggc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* A subroutine that checks multiple load and store
+   using consecutive registers.
+     OP is a parallel rtx we would like to check.
+     LOAD_P indicates whether we are checking load operation.
+     PAR_INDEX is starting element of parallel rtx.
+     FIRST_ELT_REGNO is used to tell starting register number.
+     COUNT helps us to check consecutive register numbers.  */
+static bool
+nds32_consecutive_registers_load_store_p (rtx op,
+					  bool load_p,
+					  int par_index,
+					  int first_elt_regno,
+					  int count)
+{
+  int i;
+  int check_regno;
+  rtx elt;
+  rtx elt_reg;
+  rtx elt_mem;
+
+  for (i = 0; i < count; i++)
+    {
+      /* Pick up each element from parallel rtx.  */
+      elt = XVECEXP (op, 0, i + par_index);
+
+      /* If this element is not a 'set' rtx, return false immediately.  */
+      if (GET_CODE (elt) != SET)
+	return false;
+
+      /* Pick up reg and mem of this element.  */
+      elt_reg = load_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_mem = load_p ? SET_SRC (elt) : SET_DEST (elt);
+
+      /* If elt_reg is not a expected reg rtx, return false.  */
+      if (GET_CODE (elt_reg) != REG || GET_MODE (elt_reg) != SImode)
+	return false;
+      /* If elt_mem is not a expected mem rtx, return false.  */
+      if (GET_CODE (elt_mem) != MEM || GET_MODE (elt_mem) != SImode)
+	return false;
+
+      /* The consecutive registers should be in (Rb,Rb+1...Re) order.  */
+      check_regno = first_elt_regno + i;
+
+      /* If the register number is not continuous, return false.  */
+      if (REGNO (elt_reg) != (unsigned int) check_regno)
+	return false;
+    }
+
+  return true;
+}
+
+/* Function to check whether the OP is a valid load/store operation.
+   This is a helper function for the predicates:
+   'nds32_load_multiple_operation' and 'nds32_store_multiple_operation'
+   in predicates.md file.
+
+   The OP is supposed to be a parallel rtx.
+   For each element within this parallel rtx:
+     (set (reg) (mem addr)) is the form for load operation.
+     (set (mem addr) (reg)) is the form for store operation.
+   We have to extract reg and mem of every element and
+   check if the information is valid for multiple load/store operation.  */
+bool
+nds32_valid_multiple_load_store_p (rtx op, bool load_p, bool bim_p)
+{
+  int count;
+  int first_elt_regno;
+  int update_base_elt_idx;
+  int offset;
+  rtx elt;
+  rtx update_base;
+
+  /* Get the counts of elements in the parallel rtx.
+     Last one is update base register if bim_p.
+     and pick up the first element.  */
+  if (bim_p)
+    {
+      count = XVECLEN (op, 0) - 1;
+      elt = XVECEXP (op, 0, 1);
+    }
+  else
+    {
+      count = XVECLEN (op, 0);
+      elt = XVECEXP (op, 0, 0);
+    }
+
+  /* Perform some quick check for the first element in the parallel rtx.  */
+  if (GET_CODE (elt) != SET
+      || count <= 1
+      || count > 10)
+    return false;
+
+  /* Pick up regno of first element for further detail checking.
+     Note that the form is different between load and store operation.  */
+  if (load_p)
+    {
+      if (GET_CODE (SET_DEST (elt)) != REG
+	  || GET_CODE (SET_SRC (elt)) != MEM)
+	return false;
+
+      first_elt_regno = REGNO (SET_DEST (elt));
+    }
+  else
+    {
+      if (GET_CODE (SET_SRC (elt)) != REG
+	  || GET_CODE (SET_DEST (elt)) != MEM)
+	return false;
+
+      first_elt_regno = REGNO (SET_SRC (elt));
+    }
+
+  /* Perform detail check for each element.
+     Refer to nds32-multiple.md for more information
+     about following checking.
+     The starting element of parallel rtx is index 0.  */
+  if (!nds32_consecutive_registers_load_store_p (op, load_p, bim_p ? 1 : 0,
+						 first_elt_regno,
+						 count))
+    return false;
+
+  if (bim_p)
+    {
+      update_base_elt_idx = 0;
+      update_base = XVECEXP (op, 0, update_base_elt_idx);
+      if (!REG_P (SET_DEST (update_base)))
+	return false;
+      if (GET_CODE (SET_SRC (update_base)) != PLUS)
+	return false;
+      else
+	{
+	  offset = count * UNITS_PER_WORD;
+	  elt = XEXP (SET_SRC (update_base), 1);
+	  if (GET_CODE (elt) != CONST_INT
+	      || (INTVAL (elt) != offset))
+	    return false;
+	}
+    }
+
+  /* Pass all test, this is a valid rtx.  */
+  return true;
+}
+
+/* Function to check whether the OP is a valid stack push/pop operation.
+   For a valid stack operation, it must satisfy following conditions:
+     1. Consecutive registers push/pop operations.
+     2. Valid $fp/$gp/$lp push/pop operations.
+     3. The last element must be stack adjustment rtx.
+   See the prologue/epilogue implementation for details.  */
+bool
+nds32_valid_stack_push_pop_p (rtx op, bool push_p)
+{
+  int index;
+  int total_count;
+  int rest_count;
+  int first_regno;
+  int save_fp, save_gp, save_lp;
+  rtx elt;
+  rtx elt_reg;
+  rtx elt_mem;
+  rtx elt_plus;
+
+  /* Get the counts of elements in the parallel rtx.  */
+  total_count = XVECLEN (op, 0);
+
+  /* Perform some quick check for that every element should be 'set'.  */
+  for (index = 0; index < total_count; index++)
+    {
+      elt = XVECEXP (op, 0, index);
+      if (GET_CODE (elt) != SET)
+	return false;
+    }
+
+  /* For push operation, the parallel rtx looks like:
+     (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32)))
+		     (reg:SI Rb))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -28)))
+		     (reg:SI Rb+1))
+		...
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -16)))
+		     (reg:SI Re))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -12)))
+		     (reg:SI FP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -8)))
+		     (reg:SI GP_REGNUM))
+		(set (mem (plus (reg:SI SP_REGNUM) (const_int -4)))
+		     (reg:SI LP_REGNUM))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int -32)))])
+
+     For pop operation, the parallel rtx looks like:
+     (parallel [(set (reg:SI Rb)
+		     (mem (reg:SI SP_REGNUM)))
+		(set (reg:SI Rb+1)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 4))))
+		...
+		(set (reg:SI Re)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 16))))
+		(set (reg:SI FP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 20))))
+		(set (reg:SI GP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 24))))
+		(set (reg:SI LP_REGNUM)
+		     (mem (plus (reg:SI SP_REGNUM) (const_int 28))))
+		(set (reg:SI SP_REGNUM)
+		     (plus (reg:SI SP_REGNUM) (const_int 32)))]) */
+
+  /* 1. Consecutive registers push/pop operations.
+	We need to calculate how many registers should be consecutive.
+	The $sp adjustment rtx, $fp push rtx, $gp push rtx,
+	and $lp push rtx are excluded.  */
+
+  /* Detect whether we have $fp, $gp, or $lp in the parallel rtx.  */
+  save_fp = reg_mentioned_p (gen_rtx_REG (SImode, FP_REGNUM), op);
+  save_gp = reg_mentioned_p (gen_rtx_REG (SImode, GP_REGNUM), op);
+  save_lp = reg_mentioned_p (gen_rtx_REG (SImode, LP_REGNUM), op);
+  /* Exclude last $sp adjustment rtx.  */
+  rest_count = total_count - 1;
+  /* Exclude $fp, $gp, and $lp if they are in the parallel rtx.  */
+  if (save_fp)
+    rest_count--;
+  if (save_gp)
+    rest_count--;
+  if (save_lp)
+    rest_count--;
+
+  if (rest_count > 0)
+    {
+      elt = XVECEXP (op, 0, 0);
+      /* Pick up register element.  */
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      first_regno = REGNO (elt_reg);
+
+      /* The 'push' operation is a kind of store operation.
+	 The 'pop' operation is a kind of load operation.
+	 Pass corresponding false/true as second argument (bool load_p).
+	 The par_index is supposed to start with index 0.  */
+      if (!nds32_consecutive_registers_load_store_p (op,
+						     !push_p ? true : false,
+						     0,
+						     first_regno,
+						     rest_count))
+	return false;
+    }
+
+  /* 2. Valid $fp/$gp/$lp push/pop operations.
+	Remember to set start index for checking them.  */
+
+  /* The rest_count is the start index for checking $fp/$gp/$lp.  */
+  index = rest_count;
+  /* If index < 0, this parallel rtx is definitely
+     not a valid stack push/pop operation.  */
+  if (index < 0)
+    return false;
+
+  /* Check $fp/$gp/$lp one by one.
+     We use 'push_p' to pick up reg rtx and mem rtx.  */
+  if (save_fp)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+	  || GET_CODE (elt_reg) != REG
+	  || REGNO (elt_reg) != FP_REGNUM)
+	return false;
+    }
+  if (save_gp)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+	  || GET_CODE (elt_reg) != REG
+	  || REGNO (elt_reg) != GP_REGNUM)
+	return false;
+    }
+  if (save_lp)
+    {
+      elt = XVECEXP (op, 0, index);
+      elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt);
+      elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt);
+      index++;
+
+      if (GET_CODE (elt_mem) != MEM
+	  || GET_CODE (elt_reg) != REG
+	  || REGNO (elt_reg) != LP_REGNUM)
+	return false;
+    }
+
+  /* 3. The last element must be stack adjustment rtx.
+	Its form of rtx should be:
+	  (set (reg:SI SP_REGNUM)
+	       (plus (reg:SI SP_REGNUM) (const_int X)))
+	The X could be positive or negative value.  */
+
+  /* Pick up the last element.  */
+  elt = XVECEXP (op, 0, total_count - 1);
+
+  /* Extract its destination and source rtx.  */
+  elt_reg  = SET_DEST (elt);
+  elt_plus = SET_SRC (elt);
+
+  /* Check this is (set (stack_reg) (plus stack_reg const)) pattern.  */
+  if (GET_CODE (elt_reg) != REG
+      || GET_CODE (elt_plus) != PLUS
+      || REGNO (elt_reg) != SP_REGNUM)
+    return false;
+
+  /* Pass all test, this is a valid rtx.  */
+  return true;
+}
+
+/* Function to check if 'bclr' instruction can be used with IVAL.  */
+bool
+nds32_can_use_bclr_p (HOST_WIDE_INT ival)
+{
+  int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+
+  /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit,
+     it means the original ival has only one 0-bit,
+     So it is ok to perform 'bclr' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival) & mask);
+
+  /* 'bclr' is a performance extension instruction.  */
+  return (TARGET_EXT_PERF && (one_bit_count == 1));
+}
+
+/* Function to check if 'bset' instruction can be used with IVAL.  */
+bool
+nds32_can_use_bset_p (HOST_WIDE_INT ival)
+{
+  int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+
+  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+     it is ok to perform 'bset' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
+
+  /* 'bset' is a performance extension instruction.  */
+  return (TARGET_EXT_PERF && (one_bit_count == 1));
+}
+
+/* Function to check if 'btgl' instruction can be used with IVAL.  */
+bool
+nds32_can_use_btgl_p (HOST_WIDE_INT ival)
+{
+  int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
+
+  /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
+     it is ok to perform 'btgl' operation.  */
+
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
+
+  /* 'btgl' is a performance extension instruction.  */
+  return (TARGET_EXT_PERF && (one_bit_count == 1));
+}
+
+/* Function to check if 'bitci' instruction can be used with IVAL.  */
+bool
+nds32_can_use_bitci_p (HOST_WIDE_INT ival)
+{
+  /* If we are using V3 ISA, we have 'bitci' instruction.
+     Try to see if we can present 'andi' semantic with
+     such 'bit-clear-immediate' operation.
+     For example, 'andi $r0,$r0,0xfffffffc' can be
+     presented with 'bitci $r0,$r0,3'.  */
+  return (TARGET_ISA_V3
+	  && (ival < 0)
+	  && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode)));
+}
+
+/* Return true if is load/store with SYMBOL_REF addressing mode
+   and memory mode is SImode.  */
+bool
+nds32_symbol_load_store_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      mem_src = SET_SRC (PATTERN (insn));
+      break;
+    case TYPE_STORE:
+      mem_src = SET_DEST (PATTERN (insn));
+      break;
+    default:
+      break;
+    }
+
+  /* Find load/store insn with addressing mode is SYMBOL_REF.  */
+  if (mem_src != NULL_RTX)
+    {
+      if ((GET_CODE (mem_src) == ZERO_EXTEND)
+	  || (GET_CODE (mem_src) == SIGN_EXTEND))
+	mem_src = XEXP (mem_src, 0);
+
+      if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF)
+	   || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM))
+	return true;
+    }
+
+  return false;
+}
+
+/* Vaild memory operand for floating-point loads and stores */
+bool
+nds32_float_mem_operand_p (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+  rtx addr = XEXP (op, 0);
+
+  /* Not support [symbol] [const] memory */
+  if (GET_CODE (addr) == SYMBOL_REF
+      || GET_CODE (addr) == CONST
+      || GET_CODE (addr) == LO_SUM)
+    return false;
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
+	return false;
+
+      /* Restrict const range: (imm12s << 2) */
+      if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	{
+	  if ((mode == SImode || mode == SFmode)
+	      && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1)))
+	      && !satisfies_constraint_Is14 ( XEXP(addr, 1)))
+	    return false;
+
+	  if ((mode == DImode || mode == DFmode)
+	      && NDS32_DOUBLE_WORD_ALIGN_P (INTVAL (XEXP (addr, 1)))
+	      && !satisfies_constraint_Is14 (XEXP (addr, 1)))
+	    return false;
+	}
+    }
+
+  return true;
+}
+
+int
+nds32_cond_move_p (rtx cmp_rtx)
+{
+  enum machine_mode cmp0_mode = GET_MODE (XEXP (cmp_rtx, 0));
+  enum machine_mode cmp1_mode = GET_MODE (XEXP (cmp_rtx, 1));
+  enum rtx_code cond = GET_CODE (cmp_rtx);
+
+  if ((cmp0_mode == DFmode || cmp0_mode == SFmode)
+      && (cmp1_mode == DFmode || cmp1_mode == SFmode)
+      && (cond == ORDERED || cond == UNORDERED))
+    return true;
+  return false;
+}
+
+/* Return true if the addresses in mem1 and mem2 are suitable for use in
+   an fldi or fsdi instruction.
+
+   This can only happen when addr1 and addr2, the addresses in mem1
+   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
+   addr1 must also be aligned on a 64-bit boundary.  */
+bool
+nds32_memory_merge_peep_p (rtx mem1, rtx mem2)
+{
+  rtx addr1, addr2;
+  unsigned int reg1;
+  HOST_WIDE_INT offset1;
+
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return false;
+
+  /* MEM1 should be aligned on a 64-bit boundary.  */
+  if (MEM_ALIGN (mem1) < 64)
+    return false;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  /* Extract a register number and offset (if used) from the first addr.  */
+  if (GET_CODE (addr1) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr1, 0)) != REG)
+	return false;
+      else
+	{
+	  reg1 = REGNO (XEXP (addr1, 0));
+	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
+	    return false;
+
+	  offset1 = INTVAL (XEXP (addr1, 1));
+	}
+    }
+  else if (GET_CODE (addr1) != REG)
+    return false;
+  else
+    {
+     reg1 = REGNO (addr1);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset1 = 0;
+    }
+  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
+  if (GET_CODE (addr2) != PLUS)
+    return false;
+
+  if (GET_CODE (XEXP (addr2, 0)) != REG
+      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
+    return false;
+
+  if (reg1 != REGNO (XEXP (addr2, 0)))
+    return false;
+
+  /* The first offset must be evenly divisible by 8 to ensure the
+     address is 64 bit aligned.  */
+  if (offset1 % 8 != 0)
+    return false;
+
+  /* The offset for the second addr must be 4 more than the first addr.  */
+  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
+    return false;
+
+  return true;
+}
+
+bool
+nds32_const_double_range_ok_p (rtx op, enum machine_mode mode,
+			       HOST_WIDE_INT lower, HOST_WIDE_INT upper)
+{
+  if (GET_CODE (op) != CONST_DOUBLE
+      || GET_MODE (op) != mode)
+    return false;
+
+  REAL_VALUE_TYPE rv;
+  long val;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+  REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+
+  return val >= lower && val < upper;
+}
+
+bool
+nds32_const_unspec_p (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+    {
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 0);
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_GOTINIT:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	      return false;
+	    default:
+	      return true;
+	    }
+	}
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x))
+    return false;
+
+  return true;
+}
+
+HOST_WIDE_INT
+const_vector_to_hwint (rtx op)
+{
+  HOST_WIDE_INT hwint = 0;
+  HOST_WIDE_INT mask;
+  int i;
+  int shift_adv;
+  int shift = 0;
+  int nelem;
+
+  switch (GET_MODE (op))
+    {
+      case V2HImode:
+	mask = 0xffff;
+	shift_adv = 16;
+	nelem = 2;
+	break;
+      case V4QImode:
+	mask = 0xff;
+	shift_adv = 8;
+	nelem = 4;
+	break;
+      default:
+	gcc_unreachable ();
+    }
+
+  if (TARGET_BIG_ENDIAN)
+    {
+      for (i = 0; i < nelem; ++i)
+	{
+	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0);
+	  hwint |= (val & mask) << shift;
+	  shift = shift + shift_adv;
+	}
+    }
+  else
+    {
+      for (i = 0; i < nelem; ++i)
+	{
+	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0);
+	  hwint |= (val & mask) << shift;
+	  shift = shift + shift_adv;
+	}
+    }
+
+  return hwint;
+}
+
+bool
+nds32_valid_CVp5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16));
+}
+
+bool
+nds32_valid_CVs5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 4)) && (ival >= -(1 << 4));
+}
+
+bool
+nds32_valid_CVs2_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 19)) && (ival >= -(1 << 19));
+}
+
+bool
+nds32_valid_CVhi_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival != 0) && ((ival & 0xfff) == 0);
+}
+
+/* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-protos.h gcc-4.9.4/gcc/config/nds32/nds32-protos.h
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-protos.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-protos.h	2016-08-08 20:37:45.582273034 +0200
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -28,6 +28,9 @@
 
 /* Register Usage.  */
 
+/* -- Order of Allocation of Registers.  */
+extern void nds32_adjust_reg_alloc_order (void);
+
 /* -- How Values Fit in Registers.  */
 
 extern int nds32_hard_regno_nregs (int, enum machine_mode);
@@ -43,6 +46,7 @@
 
 /* -- Basic Stack Layout.  */
 
+extern rtx nds32_dynamic_chain_address (rtx);
 extern rtx nds32_return_addr_rtx (int, rtx);
 
 /* -- Eliminating Frame Pointer and Arg Pointer.  */
@@ -58,71 +62,263 @@
 /* -- Function Entry and Exit.  */
 
 extern void nds32_expand_prologue (void);
-extern void nds32_expand_epilogue (void);
+extern void nds32_expand_epilogue (bool);
 extern void nds32_expand_prologue_v3push (void);
-extern void nds32_expand_epilogue_v3pop (void);
+extern void nds32_expand_epilogue_v3pop (bool);
+extern void nds32_emit_push_fpr_callee_saved (int);
+extern void nds32_emit_pop_fpr_callee_saved (int);
+extern void nds32_emit_v3pop_fpr_callee_saved (int);
 
 /* ------------------------------------------------------------------------ */
 
-/* Auxiliary functions for auxiliary macros in nds32.h.  */
+/* Auxiliary functions for manipulation DI mode.  */
 
-extern bool nds32_ls_333_p (rtx, rtx, rtx, enum machine_mode);
+extern rtx nds32_di_high_part_subreg(rtx);
+extern rtx nds32_di_low_part_subreg(rtx);
 
 /* Auxiliary functions for expanding rtl used in nds32-multiple.md.  */
 
-extern rtx nds32_expand_load_multiple (int, int, rtx, rtx);
-extern rtx nds32_expand_store_multiple (int, int, rtx, rtx);
-extern int nds32_expand_movmemqi (rtx, rtx, rtx, rtx);
+extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *);
+extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *);
+extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx);
+extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
+extern bool nds32_expand_movstr (rtx, rtx, rtx);
+extern bool nds32_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Auxiliary functions for multiple load/store predicate checking.  */
 
-extern bool nds32_valid_multiple_load_store (rtx, bool);
+extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool);
+
+/* Auxiliary functions for guard function checking in pipelines.md.  */
+
+extern bool nds32_load_single_p (rtx);
+extern bool nds32_store_single_p (rtx);
+extern bool nds32_load_double_p (rtx);
+extern bool nds32_store_double_p (rtx);
+
+extern bool nds32_n7_load_to_ii_p (rtx, rtx);
+extern bool nds32_n7_last_load_to_ii_p (rtx, rtx);
+
+extern bool nds32_n8_load_to_ii_p (rtx, rtx);
+extern bool nds32_n8_load_bi_to_ii_p (rtx, rtx);
+extern bool nds32_n8_load_to_ex_p (rtx, rtx);
+extern bool nds32_n8_ex_to_ii_p (rtx, rtx);
+extern bool nds32_n8_last_load_to_ii_p (rtx, rtx);
+extern bool nds32_n8_last_load_two_to_ii_p (rtx, rtx);
+extern bool nds32_n8_last_load_to_ex_p (rtx, rtx);
+
+extern bool nds32_e8_load_to_ii_p (rtx, rtx);
+extern bool nds32_e8_load_to_ex_p (rtx, rtx);
+extern bool nds32_e8_ex_to_ii_p (rtx, rtx);
+extern bool nds32_e8_last_load_to_ii_p (rtx, rtx);
+extern bool nds32_e8_last_load_to_ex_p (rtx, rtx);
+
+extern bool nds32_n9_2r1w_mm_to_ex_p (rtx, rtx);
+extern bool nds32_n9_3r2w_mm_to_ex_p (rtx, rtx);
+extern bool nds32_n9_last_load_to_ex_p (rtx, rtx);
+
+extern bool nds32_n13_addr_in_p (rtx, rtx);
+extern bool nds32_n13_e2_to_e1_p (rtx, rtx);
+extern bool nds32_n13_load_to_e1_p (rtx, rtx);
+extern bool nds32_n13_load_to_e2_p (rtx, rtx);
+extern bool nds32_n13_last_load_to_e1_p (rtx, rtx);
+extern bool nds32_n13_last_load_to_e2_p (rtx, rtx);
+extern bool nds32_n13_last_two_load_to_e1_p (rtx, rtx);
 
 /* Auxiliary functions for stack operation predicate checking.  */
 
-extern bool nds32_valid_stack_push_pop (rtx, bool);
+extern bool nds32_valid_stack_push_pop_p (rtx, bool);
 
 /* Auxiliary functions for bit operation detection.  */
 
-extern int nds32_can_use_bclr_p (int);
-extern int nds32_can_use_bset_p (int);
-extern int nds32_can_use_btgl_p (int);
+extern bool nds32_can_use_bclr_p (HOST_WIDE_INT);
+extern bool nds32_can_use_bset_p (HOST_WIDE_INT);
+extern bool nds32_can_use_btgl_p (HOST_WIDE_INT);
 
-extern int nds32_can_use_bitci_p (int);
+extern bool nds32_can_use_bitci_p (HOST_WIDE_INT);
 
-/* Auxiliary function for 'Computing the Length of an Insn'.  */
+extern bool nds32_const_double_range_ok_p (rtx, enum machine_mode,
+					   HOST_WIDE_INT, HOST_WIDE_INT);
 
-extern int nds32_adjust_insn_length (rtx, int);
+extern bool nds32_const_unspec_p (rtx x);
 
 /* Auxiliary functions for FP_AS_GP detection.  */
 
 extern bool nds32_symbol_load_store_p (rtx);
-extern int nds32_fp_as_gp_check_available (void);
+extern bool nds32_naked_function_p (tree);
 
 /* Auxiliary functions for jump table generation.  */
 
 extern const char *nds32_output_casesi_pc_relative (rtx *);
 extern const char *nds32_output_casesi (rtx *);
 
+/* Auxiliary functions for conditional branch generation.  */
+
+extern enum nds32_expand_result_type nds32_expand_cbranch (rtx *);
+extern enum nds32_expand_result_type nds32_expand_cstore (rtx *);
+extern void nds32_expand_float_cbranch (rtx *);
+extern void nds32_expand_float_cstore (rtx *);
+
+/* Auxiliary functions for conditional move generation.  */
+
+extern enum nds32_expand_result_type nds32_expand_movcc (rtx *);
+extern void nds32_expand_float_movcc (rtx *);
+
+/* Auxiliary functions for expand unalign load instruction.  */
+
+extern void nds32_expand_unaligned_load (rtx *, enum machine_mode);
+
+/* Auxiliary functions for expand unalign store instruction.  */
+
+extern void nds32_expand_unaligned_store (rtx *, enum machine_mode);
+
+/* Auxiliary functions for expand PIC instruction.  */
+
+extern void nds32_expand_pic_move (rtx *);
+
+/* Auxiliary functions for expand call address PIC instruction.  */
+
+extern void nds32_expand_call_address (rtx *);
+
+/* Auxiliary functions for expand TLS instruction.  */
+
+extern void nds32_expand_tls_move (rtx *);
+
+/* Auxiliary functions to legitimize TLS address.  */
+
+extern rtx nds32_legitimize_tls_address (rtx);
+
+/* Auxiliary functions to identify thread-local symbol.  */
+
+extern bool nds32_tls_referenced_p (rtx);
+
+/* Auxiliary functions to identify indirect-call symbol.  */
+
+extern bool nds32_indirect_call_referenced_p (rtx);
+
+/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern.  */
+
+extern bool symbolic_reference_mentioned_p (rtx);
+
+/* Auxiliary functions to identify conditional move comparison operand.  */
+
+extern int nds32_cond_move_p (rtx);
+
+/* Auxiliary functions to identify address for peephole2 merge instruction.  */
+
+extern bool nds32_memory_merge_peep_p (rtx, rtx);
+
 /* Auxiliary functions to identify 16 bit addresing mode.  */
 
 extern enum nds32_16bit_address_type nds32_mem_format (rtx);
 
+/* Auxiliary functions to identify floating-point addresing mode.  */
+
+extern bool nds32_float_mem_operand_p (rtx);
+
 /* Auxiliary functions to output assembly code.  */
 
 extern const char *nds32_output_16bit_store (rtx *, int);
 extern const char *nds32_output_16bit_load (rtx *, int);
 extern const char *nds32_output_32bit_store (rtx *, int);
 extern const char *nds32_output_32bit_load (rtx *, int);
-extern const char *nds32_output_32bit_load_s (rtx *, int);
+extern const char *nds32_output_32bit_load_se (rtx *, int);
+extern const char *nds32_output_float_load(rtx *);
+extern const char *nds32_output_float_store(rtx *);
+extern const char *nds32_output_smw_single_word (rtx *);
+extern const char *nds32_output_lmw_single_word (rtx *);
+extern const char *nds32_output_double (rtx *, bool);
+extern const char *nds32_output_cbranchsi4_equality_zero (rtx, rtx *);
+extern const char *nds32_output_cbranchsi4_equality_reg (rtx, rtx *);
+extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx,
+								      rtx *);
+extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx, rtx *);
+
+extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool);
+
+extern const char *nds32_output_call (rtx, rtx *,
+				      const char *, const char *, bool);
+extern const char *nds32_output_tls_desc (rtx *);
+extern const char *nds32_output_tls_ie (rtx *);
 
 /* Auxiliary functions to output stack push/pop instruction.  */
 
-extern const char *nds32_output_stack_push (void);
-extern const char *nds32_output_stack_pop (void);
+extern const char *nds32_output_stack_push (rtx);
+extern const char *nds32_output_stack_pop (rtx);
+extern const char *nds32_output_return (void);
+
+
+/* Auxiliary functions to split/output sms pattern.  */
+extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx);
+extern const char *nds32_output_sms (rtx, rtx, rtx, rtx);
+extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+/* Auxiliary functions to split double word RTX pattern.  */
+
+extern void nds32_spilt_doubleword (rtx *, bool);
+extern void nds32_split_ashiftdi3 (rtx, rtx, rtx);
+extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_rotatertdi3 (rtx, rtx, rtx);
+
+/* Auxiliary functions to split large constant RTX pattern.  */
+
+extern void nds32_expand_constant (enum machine_mode,
+				   HOST_WIDE_INT, rtx, rtx);
+
+/* Auxiliary functions to check using return with null epilogue.  */
+
+extern int nds32_can_use_return_insn (void);
+extern enum machine_mode nds32_case_vector_shorten_mode (int, int, rtx);
 
 /* Auxiliary functions to decide output alignment or not.  */
 
 extern int nds32_target_alignment (rtx);
+extern unsigned int nds32_data_alignment (tree, unsigned int);
+extern unsigned int nds32_constant_alignment (tree, unsigned int);
+extern unsigned int nds32_local_alignment (tree, unsigned int);
+
+/* Auxiliary functions to expand builtin functions.  */
+
+extern void nds32_init_builtins_impl (void);
+extern rtx nds32_expand_builtin_impl (tree, rtx, rtx,
+				      enum machine_mode, int);
+extern tree nds32_builtin_decl_impl (unsigned, bool);
+
+/* Auxiliary functions for ISR implementation.  */
+
+extern void nds32_check_isr_attrs_conflict (tree, tree);
+extern void nds32_construct_isr_vectors_information (tree, const char *);
+extern void nds32_asm_file_start_for_isr (void);
+extern void nds32_asm_file_end_for_isr (void);
+extern bool nds32_isr_function_p (tree);
+extern bool nds32_isr_function_critical_p (tree);
+
+/* Auxiliary functions for cost calculation.  */
+
+extern void nds32_init_rtx_costs (void);
+extern bool nds32_rtx_costs_impl (rtx, int, int, int, int *, bool);
+extern int nds32_address_cost_impl (rtx, enum machine_mode, addr_space_t, bool);
+extern struct register_pass_info insert_pass_fp_as_gp;
+
+extern int nds32_adjust_insn_length (rtx, int);
+
+/* Auxiliary functions for pre-define marco.  */
+extern void nds32_cpu_cpp_builtins(struct cpp_reader *);
+
+/* Auxiliary functions for const_vector's constraints.  */
+
+extern HOST_WIDE_INT const_vector_to_hwint (rtx);
+extern bool nds32_valid_CVp5_p (rtx);
+extern bool nds32_valid_CVs5_p (rtx);
+extern bool nds32_valid_CVs2_p (rtx);
+extern bool nds32_valid_CVhi_p (rtx);
+
+/* Auxiliary functions for lwm/smw.  */
+
+extern bool nds32_valid_smw_lwm_base_p (rtx);
+
+/* Auxiliary functions for register rename pass.  */
+extern reg_class_t nds32_preferred_rename_class_impl (reg_class_t);
 
 /* ------------------------------------------------------------------------ */
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-regrename.c gcc-4.9.4/gcc/config/nds32/nds32-regrename.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-regrename.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-regrename.c	2016-08-08 20:37:45.582273034 +0200
@@ -0,0 +1,377 @@
+/* Register rename pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"   /* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"     /* For DFA state_t.  */
+#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
+#include "reload.h"     /* For push_reload ().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"     /* For add_builtin_function ().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "regrename.h"
+
+static reg_class_t current_preferred_rename_class = NO_REGS;
+
+reg_class_t
+nds32_preferred_rename_class_impl (reg_class_t rclass)
+{
+  if (rclass == GENERAL_REGS)
+    return current_preferred_rename_class;
+  else
+    return NO_REGS;
+}
+
+static void
+print_hard_reg_set (FILE *file, HARD_REG_SET set)
+{
+  int i;
+
+  fprintf (file, "{ ");
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (TEST_HARD_REG_BIT (set, i))
+	fprintf (file, "%d ", i);
+    }
+  fprintf (file, "}\n");
+}
+
+void
+dump_hard_reg_set (FILE *file, HARD_REG_SET set)
+{
+  print_hard_reg_set (file, set);
+}
+
+static bool
+in_reg_class_p (unsigned regno, enum reg_class clazz)
+{
+  return TEST_HARD_REG_BIT (reg_class_contents[clazz], regno);
+}
+
+static unsigned
+try_find_best_rename_reg (du_head_p op_chain, reg_class_t preferred_class)
+{
+  HARD_REG_SET unavailable;
+  unsigned new_reg;
+  current_preferred_rename_class = preferred_class;
+
+  COMPL_HARD_REG_SET (unavailable, reg_class_contents[preferred_class]);
+  CLEAR_HARD_REG_BIT (unavailable, op_chain->regno);
+
+  new_reg = find_best_rename_reg (op_chain, GENERAL_REGS,
+				  &unavailable, op_chain->regno);
+
+  current_preferred_rename_class = NO_REGS;
+  return new_reg;
+}
+
+static bool
+try_rename_operand_to (rtx insn, unsigned op_pos,
+		       reg_class_t preferred_rename_class)
+{
+  insn_rr_info *info;
+  du_head_p op_chain;
+  unsigned newreg;
+  unsigned oldreg;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (op_chain->cannot_rename)
+    return false;
+
+  /* Already use preferred class, so do nothing.  */
+  if (TEST_HARD_REG_BIT (reg_class_contents[preferred_rename_class],
+			 op_chain->regno))
+    return false;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Try to rename operand %d to %s:\n",
+	       op_pos, reg_class_names[preferred_rename_class]);
+      print_rtl_single (dump_file, insn);
+    }
+
+  oldreg = op_chain->regno;
+  newreg = try_find_best_rename_reg (op_chain, preferred_rename_class);
+
+  if (newreg == oldreg)
+    return false;
+
+  regrename_do_replace (op_chain, newreg);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Rename operand %d to %s is Done:\n",
+	       op_pos, reg_class_names[preferred_rename_class]);
+      print_rtl_single (dump_file, insn);
+    }
+  return true;
+}
+
+static bool
+rename_slt_profitlable (rtx insn)
+{
+  rtx pattern;
+  pattern = PATTERN (insn);
+  rtx src = SET_SRC (pattern);
+  rtx op0 = XEXP (src, 0);
+  rtx op1 = XEXP (src, 0);
+
+  insn_rr_info *info;
+  du_head_p op_chain;
+  int op_pos = 0;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (in_reg_class_p (op_chain->regno, R15_TA_REG))
+    return false;
+
+  /* slt[s]45 need second operand in MIDDLE_REGS class.  */
+  if (!REG_P (op0) || !in_reg_class_p (REGNO (op0), MIDDLE_REGS))
+    return false;
+
+  /* slt[s]i45 only allow 5 bit unsigned integer.  */
+  if (REG_P (op1)
+      || (CONST_INT_P (op1) && satisfies_constraint_Iu05 (op1)))
+    return true;
+
+  return false;
+}
+
+static bool
+rename_cbranch_eq0_low_reg_profitlable (rtx insn)
+{
+  insn_rr_info *info;
+  du_head_p op_chain;
+  int op_pos = 1;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (in_reg_class_p (op_chain->regno, LOW_REGS))
+    return false;
+
+  return true;
+}
+
+
+static bool
+rename_cbranch_eq0_r15_profitlable (rtx insn)
+{
+  rtx pattern;
+  pattern = PATTERN (insn);
+  rtx if_then_else = SET_SRC (pattern);
+  rtx cond = XEXP (if_then_else, 0);
+  rtx op0 = XEXP (cond, 0);
+
+  insn_rr_info *info;
+  du_head_p op_chain;
+  int op_pos = 1;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (in_reg_class_p (op_chain->regno, R15_TA_REG))
+    return false;
+
+  /* LOW_REGS or R15_TA_REG both are 2-byte instruction.  */
+  if (REG_P (op0) && in_reg_class_p (REGNO (op0), LOW_REGS))
+    return false;
+
+  return true;
+}
+
+static bool
+rename_cbranch_eq_reg_profitlable (rtx insn)
+{
+  rtx pattern;
+  pattern = PATTERN (insn);
+  rtx if_then_else = SET_SRC (pattern);
+  rtx cond = XEXP (if_then_else, 0);
+  rtx op1 = XEXP (cond, 1);
+
+  insn_rr_info *info;
+  du_head_p op_chain;
+  int op_pos = 1;
+
+  info = &insn_rr[INSN_UID (insn)];
+
+  if (info->op_info == NULL)
+    return false;
+
+  if (info->op_info[op_pos].n_chains == 0)
+    return false;
+
+  op_chain = regrename_chain_from_id (info->op_info[op_pos].heads[0]->id);
+
+  if (in_reg_class_p (op_chain->regno, R5_REG))
+    return false;
+
+  if (REG_P (op1) && in_reg_class_p (REGNO (op1), LOW_REGS))
+     return true;
+  else
+    return false;
+}
+
+static void
+do_regrename ()
+{
+  basic_block bb;
+  rtx insn;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!INSN_P (insn))
+	    continue;
+
+	  switch (recog_memoized (insn))
+	    {
+	    case CODE_FOR_slts_compare_impl:
+	    case CODE_FOR_slt_compare_impl:
+	      /* Try to rename operand 0 to $r15 if profitable.  */
+	      if (rename_slt_profitlable (insn))
+		try_rename_operand_to (insn, 0, R15_TA_REG);
+	      break;
+	    case CODE_FOR_slt_eq0:
+	      /* Try to rename operand 0 to $r15.  */
+	      if (rename_slt_profitlable (insn))
+		try_rename_operand_to (insn, 0, R15_TA_REG);
+	      break;
+	    case CODE_FOR_cbranchsi4_equality_zero:
+	      /* Try to rename operand 1 to $r15.  */
+	      if (rename_cbranch_eq0_r15_profitlable (insn))
+		if (!try_rename_operand_to (insn, 1, R15_TA_REG))
+		  if (rename_cbranch_eq0_low_reg_profitlable (insn))
+		    try_rename_operand_to (insn, 1, LOW_REGS);
+	      break;
+	    case CODE_FOR_cbranchsi4_equality_reg:
+	    case CODE_FOR_cbranchsi4_equality_reg_or_const_int:
+	      /* Try to rename operand 1 to $r5.  */
+	      if (rename_cbranch_eq_reg_profitlable (insn))
+		try_rename_operand_to (insn, 1, R5_REG);
+	      break;
+	    }
+	}
+    }
+}
+
+static unsigned int
+nds32_regrename (void)
+{
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  regrename_init (true);
+
+  regrename_analyze (NULL);
+
+  do_regrename ();
+
+  regrename_finish ();
+  return 1;
+}
+
+const pass_data pass_data_nds32_regrename =
+{
+  RTL_PASS,				/* type */
+  "nds32-regrename",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing ),		/* todo_flags_finish */
+};
+
+class pass_nds32_regrename_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_regrename_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_regrename, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return TARGET_16_BIT && TARGET_REGRENAME_OPT; }
+  unsigned int execute () { return nds32_regrename (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_regrename_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_regrename_opt (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-relax-opt.c gcc-4.9.4/gcc/config/nds32/nds32-relax-opt.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-relax-opt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-relax-opt.c	2016-08-08 20:37:45.582273034 +0200
@@ -0,0 +1,555 @@
+/* relax-opt pass of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"   /* Required by recog.h.  */
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"     /* For DFA state_t.  */
+#include "insn-codes.h"    /* For CODE_FOR_xxx.  */
+#include "reload.h"     /* For push_reload ().  */
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"     /* For add_builtin_function ().  */
+#include "ggc.h"
+#include "tree-pass.h"
+#include "target-globals.h"
+#include "ira-int.h"
+
+/* This is used to create unique relax hint id value.
+   The initial value is 0.  */
+static int relax_group_id = 0;
+
+/* Group the following pattern as relax candidates:
+
+   1. sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+    ==>
+      addi.gp	$ra, sym
+
+   2. sethi	$ra, hi20(sym)
+      lwi	$rb, [$ra + lo12(sym)]
+    ==>
+      lwi.gp	$rb, [(sym)]
+
+   3. sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      lwi	$rb, [$ra]
+      swi	$rc, [$ra]
+    ==>
+      lwi37	$rb, [(sym)]
+      swi37	$rc, [(sym)] */
+
+/* Return true if is load/store with REG addressing mode
+   and memory mode is SImode.  */
+static bool
+nds32_reg_base_load_store_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      mem_src = SET_SRC (PATTERN (insn));
+      break;
+    case TYPE_STORE:
+      mem_src = SET_DEST (PATTERN (insn));
+      break;
+    default:
+      break;
+    }
+
+  /* Find load/store insn with addressing mode is REG.  */
+  if (mem_src != NULL_RTX)
+    {
+      if ((GET_CODE (mem_src) == ZERO_EXTEND)
+	  || (GET_CODE (mem_src) == SIGN_EXTEND))
+	mem_src = XEXP (mem_src, 0);
+
+      if (GET_CODE (XEXP (mem_src, 0)) == REG)
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if insn is a sp/fp base or sp/fp plus load-store instruction.  */
+
+static bool
+nds32_sp_base_or_plus_load_store_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      mem_src = SET_SRC (PATTERN (insn));
+      break;
+    case TYPE_STORE:
+      mem_src = SET_DEST (PATTERN (insn));
+      break;
+    default:
+      break;
+    }
+  /* Find load/store insn with addressing mode is REG.  */
+  if (mem_src != NULL_RTX)
+    {
+      if ((GET_CODE (mem_src) == ZERO_EXTEND)
+	  || (GET_CODE (mem_src) == SIGN_EXTEND))
+	mem_src = XEXP (mem_src, 0);
+
+      if ((GET_CODE (XEXP (mem_src, 0)) == PLUS))
+	mem_src = XEXP (mem_src, 0);
+
+      if (REG_P (XEXP (mem_src, 0))
+	  && ((frame_pointer_needed
+	       && REGNO (XEXP (mem_src, 0)) == FP_REGNUM)
+	      || REGNO (XEXP (mem_src, 0)) == SP_REGNUM))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if is load with REG addressing mode
+   and memory mode is SImode.  */
+static bool
+nds32_reg_base_load_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  if (get_attr_type (insn) == TYPE_LOAD)
+    mem_src = SET_SRC (PATTERN (insn));
+
+  /* Find load/store insn with addressing mode is REG.  */
+  if (mem_src != NULL_RTX)
+    {
+      if (GET_CODE (XEXP (mem_src, 0)) == REG)
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if is load with [REG + REG/CONST_INT]  addressing mode.  */
+static bool
+nds32_plus_reg_load_store_p (rtx insn)
+{
+  rtx mem_src = NULL_RTX;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD:
+      mem_src = SET_SRC (PATTERN (insn));
+      break;
+    case TYPE_STORE:
+      mem_src = SET_DEST (PATTERN (insn));
+      break;
+    default:
+      break;
+    }
+
+  /* Find load/store insn with addressing mode is [REG + REG/CONST].  */
+  if (mem_src != NULL_RTX)
+    {
+      if ((GET_CODE (mem_src) == ZERO_EXTEND)
+	  || (GET_CODE (mem_src) == SIGN_EXTEND))
+	mem_src = XEXP (mem_src, 0);
+
+      if ((GET_CODE (XEXP (mem_src, 0)) == PLUS))
+	mem_src = XEXP (mem_src, 0);
+      else
+	return false;
+
+      if (GET_CODE (XEXP (mem_src, 0)) == REG)
+	return true;
+
+    }
+
+  return false;
+}
+
+/* Group the following pattern as relax candidates:
+
+   GOT:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      lw	$rb, [$ra + $gp]
+
+   GOTOFF, TLSLE, TLSIE:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      LS	$rb, [$ra + $gp]
+
+   TLSIE (not PIC) !UNSPEC:
+   This is as the same as normal load-store, and it's done as normal pattern.
+      sethi	$ra, hi20(sym)
+      lwi	$ra, [$ra + lo12(sym)]
+
+   GOTOFF, TLSLE:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      add	$rb, $ra, $gp($tp)
+
+   PLT:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      add	$ra, $ra, $gp
+      jral	$lp, $ra
+
+   TLSGD and TLSLD !UNSPEC:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      add	$r0, $ra, $gp
+      lw	$rb, [$r0]
+      jral	$rb
+   or:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      lw	$rb, [$ra + $gp]
+      add	$r0, $ra, $gp
+      jral	$rb
+
+   Initial GOT table:
+      sethi	$gp,hi20(sym)
+      ori	$gp, $gp, lo12(sym)
+      add5.pc	$gp  */
+
+static auto_vec<rtx, 32> nds32_group_infos;
+/* Group the PIC and TLS relax candidate instructions for linker.  */
+static bool
+nds32_pic_tls_group (rtx def_insn,
+		     enum nds32_relax_insn_type relax_type,
+		     int sym_type)
+{
+  df_ref *def_record;
+  df_link *link;
+  rtx use_insn = NULL_RTX;
+  def_record = DF_INSN_DEFS (def_insn);
+  for (link = DF_REF_CHAIN (*def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Skip if define insn and use insn not in the same basic block.  */
+      if (!dominated_by_p (CDI_DOMINATORS,
+			   BLOCK_FOR_INSN (use_insn),
+			   BLOCK_FOR_INSN (def_insn)))
+	return FALSE;
+
+      /* Skip if use_insn not active insn.  */
+      if (!active_insn_p (use_insn))
+	return FALSE;
+
+      switch (relax_type)
+	{
+	case RELAX_ORI:
+
+	  /* TLSIE and GOT don't generate ADD instruction,
+	     so don't join relax hint above ADD instruction.  */
+
+	  /* There are two possible relax patterns of TLSGD (TLSLD),
+	     so we have to checkout both possibility.
+	     Define:	'ori $ra, $ra, lo12(sym)'
+	     Use:	'add $rb, $ra, $gp',
+	     or		'lw  $rb, [$ra + $gp]'.  */
+	  if ((sym_type == UNSPEC_TLSGD || sym_type == UNSPEC_TLSLD)
+	      && ((recog_memoized (use_insn) == CODE_FOR_addsi3
+		   && nds32_pic_tls_group (use_insn, RELAX_TLS_ADD_LW,
+					   sym_type))
+		  || (nds32_plus_reg_load_store_p (use_insn)
+		      && !nds32_sp_base_or_plus_load_store_p (use_insn)
+		      && nds32_pic_tls_group (use_insn, RELAX_TLS_LW_JRAL,
+					      sym_type))))
+	    nds32_group_infos.safe_push (use_insn);
+	  /* Define:	'ori $ra, $ra, lo12(sym)'
+	     Use:	'add $rb, $ra, $gp'.  */
+	  else if (recog_memoized (use_insn) == CODE_FOR_addsi3
+		   && ((sym_type == UNSPEC_PLT
+			&& nds32_pic_tls_group (use_insn,
+						RELAX_PLT_ADD,
+						sym_type))
+		       || sym_type == UNSPEC_TLSLE
+		       || sym_type == UNSPEC_GOTOFF))
+	    nds32_group_infos.safe_push (use_insn);
+	  else if (nds32_plus_reg_load_store_p (use_insn)
+		   && !nds32_sp_base_or_plus_load_store_p (use_insn))
+	    nds32_group_infos.safe_push (use_insn);
+	  else
+	    return FALSE;
+	  break;
+
+	case RELAX_PLT_ADD:
+	  /* Define:	'add $ra, $ra, $gp'
+	     Use:	'jral $ra'.  */
+	  if (get_attr_type (use_insn) == TYPE_BRANCH)
+	    nds32_group_infos.safe_push (use_insn);
+	  else if (nds32_sp_base_or_plus_load_store_p (use_insn))
+	    /* Skip SP base load-store instruction, because it may be a reload
+	       instruction.  */
+	    continue;
+	  else
+	    return FALSE;
+	  break;
+
+	case RELAX_TLS_ADD_LW:
+	  /*  This def-use chain's register number is argument, we want
+	      to insert relax hint by call register.  */
+	  if (get_attr_type (use_insn) == TYPE_BRANCH)
+	    continue;
+	  /* Define:	'add $r0, $ra, $gp'
+	     Use:	'lw $rb, [$r0]'.  */
+	  else if (nds32_reg_base_load_p (use_insn)
+		   && nds32_pic_tls_group (use_insn, RELAX_TLS_LW_JRAL,
+					   sym_type))
+	    nds32_group_infos.safe_push (use_insn);
+	  else
+	    return FALSE;
+	  break;
+
+	case RELAX_TLS_LW_JRAL:
+	  /* Define:	'lw $rb, [$ra + $gp]',
+	     or:	'lw $rb, [$r0]'
+	     Use:	'jral $rb'.  */
+	  if (get_attr_type (use_insn) == TYPE_BRANCH)
+	    nds32_group_infos.safe_push (use_insn);
+	  else
+	    return FALSE;
+	  break;
+
+	default:
+	  return FALSE;
+	}
+    }
+  return TRUE;
+}
+
+static int
+nds32_pic_tls_symbol_type (rtx x)
+{
+  x = XEXP (SET_SRC (PATTERN (x)), 1);
+
+  if (GET_CODE (x) == CONST)
+    {
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 0);
+
+      return XINT (x, 1);
+    }
+
+  return XINT (x, 1);
+}
+
+/* Group the relax candidates with group id.  */
+static void
+nds32_group_insns (rtx sethi)
+{
+  df_ref *def_record;
+  df_link *link;
+  rtx use_insn = NULL_RTX, group_id;
+  bool valid;
+
+  def_record = DF_INSN_DEFS (sethi);
+
+  for (link = DF_REF_CHAIN (*def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Skip if define insn and use insn not in the same basic block.  */
+      if (!dominated_by_p (CDI_DOMINATORS,
+			   BLOCK_FOR_INSN (use_insn),
+			   BLOCK_FOR_INSN (sethi)))
+	return;
+
+      /* Skip if use_insn not active insn.  */
+      if (!active_insn_p (use_insn))
+	return;
+
+     /* Initial use_insn_type.  */
+      if (!(recog_memoized (use_insn) == CODE_FOR_lo_sum
+	    || nds32_symbol_load_store_p (use_insn)
+	    || (nds32_reg_base_load_store_p (use_insn)
+		&&!nds32_sp_base_or_plus_load_store_p (use_insn))))
+	return;
+    }
+
+  group_id = GEN_INT (relax_group_id);
+  /* Insert .relax_* directive for sethi.  */
+  emit_insn_before (gen_relax_group (group_id), sethi);
+
+  /* Scan the use insns and insert the directive.  */
+  for (link = DF_REF_CHAIN (*def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Insert .relax_* directive.  */
+      if (active_insn_p (use_insn))
+	emit_insn_before (gen_relax_group (group_id), use_insn);
+
+      /* Find ori ra, ra, unspec(symbol) instruction.  */
+      if (use_insn != NULL_RTX
+	  && recog_memoized (use_insn) == CODE_FOR_lo_sum
+	  && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1)))
+	{
+	  int sym_type = nds32_pic_tls_symbol_type (use_insn);
+	  valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type);
+
+	  /* Insert .relax_* directive.  */
+	  while (!nds32_group_infos.is_empty ())
+	    {
+	      use_insn = nds32_group_infos.pop ();
+	      if (valid)
+		emit_insn_before (gen_relax_group (group_id), use_insn);
+	    }
+	}
+    }
+
+  relax_group_id++;
+}
+
+/* Convert relax group id in rtl.  */
+
+static void
+nds32_group_tls_insn (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0);
+
+  while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL)
+    {
+      pat = XVECEXP (pat, 0, 0);
+    }
+
+  if (GET_CODE (unspec_relax_group) == UNSPEC
+      && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP)
+    {
+      XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id);
+    }
+
+  relax_group_id++;
+}
+
+/* Group the relax candidate instructions for linker.  */
+static void
+nds32_relax_group (void)
+{
+  rtx insn;
+
+  compute_bb_for_insn ();
+
+  df_chain_add_problem (DF_DU_CHAIN);
+  df_insn_rescan_all ();
+  df_analyze ();
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  insn = get_insns ();
+  gcc_assert (NOTE_P (insn));
+
+  for (insn = next_active_insn (insn); insn; insn = next_active_insn (insn))
+    {
+      if (NONJUMP_INSN_P (insn))
+	{
+	  /* Find sethi ra, symbol  instruction.  */
+	  if (recog_memoized (insn) == CODE_FOR_sethi
+	      && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0),
+					 SImode))
+	    nds32_group_insns (insn);
+	  else if (recog_memoized (insn) == CODE_FOR_tls_ie)
+	    nds32_group_tls_insn (insn);
+	}
+      else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc)
+	{
+	  nds32_group_tls_insn (insn);
+	}
+    }
+
+  /* We must call df_finish_pass manually because it should be invoked before
+     BB information is destroyed. Hence we cannot set the TODO_df_finish flag
+     to the pass manager.  */
+  df_insn_rescan_all ();
+  df_finish_pass (false);
+  free_dominance_info (CDI_DOMINATORS);
+}
+
+static unsigned int
+nds32_relax_opt (void)
+{
+  if (TARGET_RELAX_HINT)
+    nds32_relax_group ();
+  return 1;
+}
+
+const pass_data pass_data_nds32_relax_opt =
+{
+  RTL_PASS,				/* type */
+  "relax_opt",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  ( TODO_df_finish | TODO_verify_rtl_sharing),	/* todo_flags_finish */
+};
+
+class pass_nds32_relax_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_relax_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_relax_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return TARGET_RELAX_HINT; }
+  unsigned int execute () { return nds32_relax_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_relax_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_relax_opt (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/nds32-soft-fp-comm.c gcc-4.9.4/gcc/config/nds32/nds32-soft-fp-comm.c
--- gcc-4.9.4.orig/gcc/config/nds32/nds32-soft-fp-comm.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/nds32-soft-fp-comm.c	2016-08-08 20:37:45.582273034 +0200
@@ -0,0 +1,139 @@
+/* Operand commutative for soft floating point arithmetic pass
+   of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "df.h"
+#include "tree-pass.h"
+
+#define ARG0_REGNO 0
+#define ARG1_REGNO 1
+
+static int
+nds32_soft_fp_arith_comm_opt (void)
+{
+  basic_block bb;
+  rtx insn;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!CALL_P (insn))
+	    continue;
+
+	  rtx pat = PATTERN (insn);
+	  rtx call_rtx = XVECEXP (pat, 0, 0);
+
+	  if (GET_CODE (call_rtx) == SET)
+	    call_rtx = SET_SRC (call_rtx);
+
+	  rtx func_mem = XEXP (call_rtx, 0);
+	  rtx symbol = XEXP (func_mem, 0);
+	  const char *func_name = XSTR (symbol, 0);
+	  if (!((strcmp("__mulsf3", func_name) == 0)
+		|| (strcmp("__addsf3", func_name) == 0)))
+	    continue;
+
+	  rtx prev_insn = insn;
+	  rtx arg0_insn = NULL_RTX;
+	  rtx arg1_insn = NULL_RTX;
+	  while ((prev_insn = PREV_INSN (prev_insn)) && prev_insn)
+	    {
+	      if (BLOCK_FOR_INSN (prev_insn) != BLOCK_FOR_INSN (insn))
+		break;
+
+	      rtx set = PATTERN (prev_insn);
+
+	      rtx dst_reg = SET_DEST (set);
+
+	      if (!REG_P (dst_reg))
+		break;
+
+	      unsigned regno = REGNO (dst_reg);
+
+	      if (regno == ARG0_REGNO)
+		{
+		  arg0_insn = prev_insn;
+		  continue;
+		}
+	      else if (regno == ARG1_REGNO)
+		{
+		  arg1_insn = prev_insn;
+		  continue;
+		}
+	      break;
+	    }
+	  if (arg0_insn == NULL_RTX || arg1_insn == NULL_RTX)
+	   continue;
+
+	  rtx arg0_src = SET_SRC (PATTERN (arg0_insn));
+	  rtx arg1_src = SET_SRC (PATTERN (arg1_insn));
+
+	  if ((REG_P (arg0_src) && REGNO (arg0_src) == ARG1_REGNO)
+	      || (REG_P (arg1_src) && REGNO (arg1_src) == ARG0_REGNO))
+	    {
+	      /* Swap operand! */
+	      rtx tmp = SET_DEST (PATTERN (arg0_insn));
+	      SET_DEST (PATTERN (arg0_insn)) = SET_DEST (PATTERN (arg1_insn));
+	      SET_DEST (PATTERN (arg1_insn)) = tmp;
+	    }
+	}
+    }
+  return 1;
+}
+
+const pass_data pass_data_nds32_soft_fp_arith_comm_opt =
+{
+  RTL_PASS,				/* type */
+  "soft_fp_arith_comm",			/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  true,					/* has_gate */
+  true,					/* has_execute */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_verify_rtl_sharing,		/* todo_flags_finish */
+};
+
+class pass_nds32_soft_fp_arith_comm_opt : public rtl_opt_pass
+{
+public:
+  pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_soft_fp_arith_comm_opt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return TARGET_SOFT_FP_ARITH_COMM && !TARGET_FPU_SINGLE; }
+  unsigned int execute () { return nds32_soft_fp_arith_comm_opt (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_soft_fp_arith_comm_opt (gcc::context *ctxt)
+{
+  return new pass_nds32_soft_fp_arith_comm_opt (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/pipelines.md gcc-4.9.4/gcc/config/nds32/pipelines.md
--- gcc-4.9.4.orig/gcc/config/nds32/pipelines.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/pipelines.md	2016-08-08 20:37:45.594273497 +0200
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -18,12 +18,47 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_automaton "nds32_machine")
+;; ------------------------------------------------------------------------
+;; Include N7 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n7.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include N8 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n8.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include E8 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-e8.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include N9/N10 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n9-3r2w.md")
+(include "nds32-n9-2r1w.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include N12/N13 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n13.md")
+
+
+;; ------------------------------------------------------------------------
+;; Define simple pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_simple_machine")
 
-(define_cpu_unit "general_unit" "nds32_machine")
+(define_cpu_unit "simple_unit" "nds32_simple_machine")
 
 (define_insn_reservation "simple_insn" 1
-			 (eq_attr "type" "unknown,load,store,move,alu,compare,branch,call,misc")
-			 "general_unit")
+  (eq_attr "pipeline_model" "simple")
+  "simple_unit")
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/predicates.md gcc-4.9.4/gcc/config/nds32/predicates.md
--- gcc-4.9.4.orig/gcc/config/nds32/predicates.md	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/predicates.md	2016-08-08 20:37:45.594273497 +0200
@@ -1,5 +1,5 @@
 ;; Predicate definitions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -24,25 +24,89 @@
 (define_predicate "nds32_greater_less_comparison_operator"
   (match_code "gt,ge,lt,le"))
 
+(define_predicate "nds32_float_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt,ordered,unordered,ungt,unge,unlt,unle"))
+
+(define_predicate "nds32_movecc_comparison_operator"
+  (match_code "eq,ne,le,leu,ge,geu"))
+
 (define_special_predicate "nds32_logical_binary_operator"
   (match_code "and,ior,xor"))
 
+(define_special_predicate "nds32_conditional_call_comparison_operator"
+  (match_code "lt,ge"))
+
+(define_special_predicate "nds32_have_33_inst_operator"
+  (match_code "mult,and,ior,xor"))
+
 (define_predicate "nds32_symbolic_operand"
   (match_code "const,symbol_ref,label_ref"))
 
+(define_predicate "nds32_nonunspec_symbolic_operand"
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "!flag_pic && nds32_const_unspec_p (op)")))
+
+(define_predicate "nds32_label_operand"
+  (match_code "label_ref"))
+
 (define_predicate "nds32_reg_constant_operand"
-  (ior (match_operand 0 "register_operand")
-       (match_operand 0 "const_int_operand")))
+  (match_code "reg,const_int"))
 
 (define_predicate "nds32_rimm15s_operand"
   (ior (match_operand 0 "register_operand")
        (and (match_operand 0 "const_int_operand")
 	    (match_test "satisfies_constraint_Is15 (op)"))))
 
+(define_predicate "nds32_rimm11s_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "const_int_operand")
+	    (match_test "satisfies_constraint_Is11 (op)"))))
+
+(define_predicate "nds32_imm_0_1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv00 (op)")
+	    (match_test "satisfies_constraint_Iv01 (op)"))))
+
+(define_predicate "nds32_imm_1_2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv01 (op)")
+	    (match_test "satisfies_constraint_Iv02 (op)"))))
+
+(define_predicate "nds32_imm_1_2_4_8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (ior (match_test "satisfies_constraint_Iv01 (op)")
+		 (match_test "satisfies_constraint_Iv02 (op)"))
+	    (ior (match_test "satisfies_constraint_Iv04 (op)")
+		 (match_test "satisfies_constraint_Iv08 (op)")))))
+
+(define_predicate "nds32_imm2u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu02 (op)")))
+
+(define_predicate "nds32_imm4u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu04 (op)")))
+
 (define_predicate "nds32_imm5u_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "satisfies_constraint_Iu05 (op)")))
 
+(define_predicate "nds32_imm6u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu06 (op)")))
+
+(define_predicate "nds32_rimm4u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm4u_operand")))
+
+(define_predicate "nds32_rimm5u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm5u_operand")))
+
+(define_predicate "nds32_rimm6u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm6u_operand")))
+
 (define_predicate "nds32_move_operand"
   (and (match_operand 0 "general_operand")
        (not (match_code "high,const,symbol_ref,label_ref")))
@@ -57,12 +121,103 @@
   return true;
 })
 
+(define_predicate "nds32_vmove_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_code "high,const,symbol_ref,label_ref")))
+{
+  /* If the constant op does NOT satisfy Is20 nor Ihig,
+     we can not perform move behavior by a single instruction.  */
+  if (GET_CODE (op) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (op)
+      && !satisfies_constraint_CVhi (op))
+    return false;
+
+  return true;
+})
+
+(define_predicate "nds32_and_operand"
+  (match_code "reg,const_int")
+{
+  return REG_P (op)
+	 || satisfies_constraint_Izeb (op)
+	 || satisfies_constraint_Izeh (op)
+	 || satisfies_constraint_Ixls (op)
+	 || satisfies_constraint_Ix11 (op)
+	 || satisfies_constraint_Ibms (op)
+	 || satisfies_constraint_Ifex (op)
+	 || satisfies_constraint_Iu15 (op)
+	 || satisfies_constraint_Ii15 (op)
+	 || satisfies_constraint_Ic15 (op);
+})
+
+(define_predicate "nds32_ior_operand"
+  (match_code "reg,const_int")
+{
+  return REG_P (op)
+	 || satisfies_constraint_Iu15 (op)
+	 || satisfies_constraint_Ie15 (op);
+})
+
+(define_predicate "nds32_xor_operand"
+  (match_code "reg,const_int")
+{
+  return REG_P (op)
+	 || GET_CODE (op) == SUBREG
+	 || satisfies_constraint_Iu15 (op)
+	 || satisfies_constraint_It15 (op);
+})
+
+(define_predicate "nds32_general_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (REG_P (op)
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO (op) <= NDS32_LAST_GPR_REGNUM));
+})
+
+(define_predicate "nds32_insv_operand"
+  (match_code "const_int")
+{
+  return INTVAL (op) == 0
+	 || INTVAL (op) == 8
+	 || INTVAL (op) == 16
+	 || INTVAL (op) == 24;
+})
+
+(define_predicate "nds32_lmw_smw_base_operand"
+  (and (match_code "mem")
+       (match_test "nds32_valid_smw_lwm_base_p (op)")))
+
+(define_predicate "float_even_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM")
+	    (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM")
+	    (match_test "(REGNO (op) & 1) == 0"))))
+
+(define_predicate "float_odd_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) >= NDS32_FIRST_FPR_REGNUM")
+	    (match_test "REGNO (op) <= NDS32_LAST_FPR_REGNUM")
+	    (match_test "(REGNO (op) & 1) != 0"))))
+
 (define_special_predicate "nds32_load_multiple_operation"
   (match_code "parallel")
 {
   /* To verify 'load' operation, pass 'true' for the second argument.
      See the implementation in nds32.c for details.  */
-  return nds32_valid_multiple_load_store (op, true);
+  return nds32_valid_multiple_load_store_p (op, true, false);
+})
+
+(define_special_predicate "nds32_load_multiple_and_update_address_operation"
+  (match_code "parallel")
+{
+  /* To verify 'load' operation, pass 'true' for the second argument.
+     to verify 'update address' operation, pass 'true' for the third argument
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_multiple_load_store_p (op, true, true);
 })
 
 (define_special_predicate "nds32_store_multiple_operation"
@@ -70,23 +225,32 @@
 {
   /* To verify 'store' operation, pass 'false' for the second argument.
      See the implementation in nds32.c for details.  */
-  return nds32_valid_multiple_load_store (op, false);
+  return nds32_valid_multiple_load_store_p (op, false, false);
+})
+
+(define_special_predicate "nds32_store_multiple_and_update_address_operation"
+  (match_code "parallel")
+{
+  /* To verify 'store' operation, pass 'false' for the second argument,
+     to verify 'update address' operation, pass 'true' for the third argument
+     See the implementation in nds32.c for details.  */
+  return nds32_valid_multiple_load_store_p (op, false, true);
 })
 
 (define_special_predicate "nds32_stack_push_operation"
   (match_code "parallel")
 {
   /* To verify 'push' operation, pass 'true' for the second argument.
-     See the implementation in nds32.c for details.  */
-  return nds32_valid_stack_push_pop (op, true);
+     See the implementation in nds32-predicates.c for details.  */
+  return nds32_valid_stack_push_pop_p (op, true);
 })
 
 (define_special_predicate "nds32_stack_pop_operation"
   (match_code "parallel")
 {
   /* To verify 'pop' operation, pass 'false' for the second argument.
-     See the implementation in nds32.c for details.  */
-  return nds32_valid_stack_push_pop (op, false);
+     See the implementation in nds32-predicates.c for details.  */
+  return nds32_valid_stack_push_pop_p (op, false);
 })
 
 ;; ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/t-elf gcc-4.9.4/gcc/config/nds32/t-elf
--- gcc-4.9.4.orig/gcc/config/nds32/t-elf	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/t-elf	2016-08-08 20:37:45.594273497 +0200
@@ -0,0 +1,34 @@
+# The multilib settings of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+# driver program which options are defaults for this target and thus
+# do not need to be handled specially.
+MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh
+
+ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += mext-dsp
+endif
+
+ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += mext-zol
+endif
+
+# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/t-linux gcc-4.9.4/gcc/config/nds32/t-linux
--- gcc-4.9.4.orig/gcc/config/nds32/t-linux	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/t-linux	2016-08-08 20:37:45.594273497 +0200
@@ -0,0 +1,26 @@
+# The multilib settings of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+# driver program which options are defaults for this target and thus
+# do not need to be handled specially.
+MULTILIB_OPTIONS +=
+
+# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/t-mlibs gcc-4.9.4/gcc/config/nds32/t-mlibs
--- gcc-4.9.4.orig/gcc/config/nds32/t-mlibs	2014-02-14 06:01:31.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/t-mlibs	1970-01-01 01:00:00.000000000 +0100
@@ -1,38 +0,0 @@
-# The multilib settings of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
-# Contributed by Andes Technology Corporation.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3, or (at your
-# option) any later version.
-#
-# GCC is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-# License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-# We need to build following multilibs combinations:
-#
-#   1. <None multilibs>
-#   2. -mlittle-endian
-#   3. -mbig-endian
-#   4. -mgp-direct
-#   5. -mno-gp-direct
-#   6. -mlittle-endian -mgp-direct
-#   7. -mlittle-endian -mno-gp-direct
-#   8. -mbig-endian -mgp-direct
-#   9. -mbig-endian -mno-gp-direct
-#
-# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
-# driver program which options are defaults for this target and thus
-# do not need to be handled specially.
-MULTILIB_OPTIONS = mlittle-endian/mbig-endian mgp-direct/mno-gp-direct
-
-# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/gcc/config/nds32/t-nds32 gcc-4.9.4/gcc/config/nds32/t-nds32
--- gcc-4.9.4.orig/gcc/config/nds32/t-nds32	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/config/nds32/t-nds32	2016-08-08 20:37:45.622274582 +0200
@@ -0,0 +1,199 @@
+# Dependency rules rule of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+
+nds32-md-auxiliary.o: $(srcdir)/config/nds32/nds32-md-auxiliary.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-md-auxiliary.c
+
+nds32-memory-manipulation.o: $(srcdir)/config/nds32/nds32-memory-manipulation.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-memory-manipulation.c
+
+nds32-predicates.o: $(srcdir)/config/nds32/nds32-predicates.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-predicates.c
+
+nds32-intrinsic.o: $(srcdir)/config/nds32/nds32-intrinsic.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-intrinsic.c
+
+nds32-pipelines-auxiliary.o: \
+  $(srcdir)/config/nds32/nds32-pipelines-auxiliary.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-pipelines-auxiliary.c
+
+nds32-isr.o: \
+  $(srcdir)/config/nds32/nds32-isr.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-isr.c
+
+nds32-cost.o: \
+  $(srcdir)/config/nds32/nds32-cost.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-cost.c
+
+nds32-fp-as-gp.o: \
+  $(srcdir)/config/nds32/nds32-fp-as-gp.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-fp-as-gp.c
+
+nds32-load-store-opt.o: \
+  $(srcdir)/config/nds32/nds32-load-store-opt.c \
+  $(srcdir)/config/nds32/nds32-load-store-opt.h \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-load-store-opt.c
+
+nds32-soft-fp-comm.o: \
+  $(srcdir)/config/nds32/nds32-soft-fp-comm.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-soft-fp-comm.c
+
+nds32-regrename.o: \
+  $(srcdir)/config/nds32/nds32-regrename.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-regrename.c
+
+nds32-gcse.o: \
+  $(srcdir)/config/nds32/nds32-gcse.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-gcse.c
+
+nds32-relax-opt.o: \
+  $(srcdir)/config/nds32/nds32-relax-opt.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-relax-opt.c
+
+nds32-hwloop.o: \
+  $(srcdir)/config/nds32/nds32-hwloop.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h dumpfile.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/nds32/nds32-hwloop.c
diff -Nur gcc-4.9.4.orig/gcc/config.gcc gcc-4.9.4/gcc/config.gcc
--- gcc-4.9.4.orig/gcc/config.gcc	2016-03-14 11:03:12.000000000 +0100
+++ gcc-4.9.4/gcc/config.gcc	2016-08-08 20:37:45.622274582 +0200
@@ -427,7 +427,25 @@
 	;;
 nds32*)
 	cpu_type=nds32
-	extra_headers="nds32_intrinsic.h"
+	need_64bit_hwint=yes
+	extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc"
+	case ${target} in
+	  nds32*-*-linux*)
+	    extra_options="${extra_options} nds32/nds32-linux.opt"
+	    ;;
+	  nds32*-*-elf*)
+	    extra_options="${extra_options} nds32/nds32-elf.opt"
+	    ;;
+	  *)
+	    ;;
+	esac
+	extra_options="${extra_options} g.opt"
+	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-md-auxiliary.o \
+		    nds32-pipelines-auxiliary.o nds32-predicates.o \
+		    nds32-memory-manipulation.o nds32-fp-as-gp.o \
+		    nds32-load-store-opt.o nds32-soft-fp-comm.o nds32-isr.o \
+		    nds32-regrename.o nds32-gcse.o nds32-relax-opt.o \
+		    nds32-hwloop.o"
 	;;
 nios2-*-*)
 	cpu_type=nios2
@@ -2135,17 +2153,67 @@
 	cxx_target_objs="msp430-c.o"
 	tmake_file="${tmake_file} msp430/t-msp430"
 	;;
-nds32le-*-*)
+nds32*-*-*)
 	target_cpu_default="0"
 	tm_defines="${tm_defines}"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
-	tmake_file="nds32/t-mlibs"
-	;;
-nds32be-*-*)
-	target_cpu_default="0|MASK_BIG_ENDIAN"
-	tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file}"
-	tmake_file="nds32/t-mlibs"
+	case ${target} in
+	  nds32le*-*-*)
+	    ;;
+	  nds32be-*-*)
+	    target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN"
+	    tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+	    ;;
+	esac
+	case ${target} in
+	  nds32*-*-elf*)
+	    tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h"
+	    tmake_file="nds32/t-nds32 nds32/t-elf"
+	    ;;
+	  nds32*-*-linux*)
+	    tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h"
+	    tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux"
+	    ;;
+	esac
+	nds32_multilibs="${with_multilib_list}"
+	if test "$nds32_multilibs" = "default"; then
+	  nds32_multilibs=""
+	fi
+	nds32_multilibs=`echo $nds32_multilibs | sed -e 's/,/ /g'`
+	for nds32_multilib in ${nds32_multilibs}; do
+		case ${nds32_multilib} in
+		dsp | zol )
+			TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG} ${nds32_multilib}"
+			;;
+		*)
+			echo "--with-multilib-list=${nds32_multilib} not supported."
+			exit 1
+		esac
+	done
+
+	# Handle --enable-default-relax setting.
+	if test x${enable_default_relax} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1"
+	fi
+	# Handle --enable-Os-default-ifc setting.
+	if test x${enable_Os_default_ifc} = xyes; then
+		tm_defines="${tm_defines} TARGET_OS_DEFAULT_IFC=1"
+	fi
+	# Handle --enable-Os-default-ex9 setting.
+	if test x${enable_Os_default_ex9} = xyes; then
+		tm_defines="${tm_defines} TARGET_OS_DEFAULT_EX9=1"
+	fi
+	# Handle --with-ext-dsp
+	if test x${with_ext_dsp} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
+	fi
+	if test x${with_ext_zol} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_HWLOOP=1"
+	fi
+	# Handle --with-16bit-ext, and default is on
+	if test x${with_ext_16bit} != xno; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_16BIT=1"
+	fi
+
 	;;
 nios2-*-*)
 	tm_file="elfos.h ${tm_file}"
@@ -3867,15 +3935,51 @@
 		;;
 
 	nds32*-*-*)
-		supported_defaults="arch nds32_lib"
+		supported_defaults="arch cpu nds32_lib float fpu_config memory_model"
 
 		# process --with-arch
 		case "${with_arch}" in
-		"" | v2 | v3 | v3m)
+		"" | v3 | v3j)
+			# OK
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		v2 | v2j | v3m)
+			# OK
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=0"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=16"
+			;;
+		v3f)
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=1"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		v3s)
+			tm_defines="${tm_defines} TARGET_ARCH_DEFAULT=2"
+			tm_defines="${tm_defines} TARGET_DEFAULT_ISR_VECTOR_SIZE=4"
+			;;
+		*)
+			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v2j v3 v3j v3m v3f v3s" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-memory-model
+		case "${with_memory_model}" in
+		"" | fast | slow)
+			;;
+		*)
+			echo "Cannot accept --with-memory-model=$with_memory_model, available values are: fast slow" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-cpu
+		case "${with_cpu}" in
+		"" | n7 | n8 | e8 | n9 | n10 | n12 | n13)
 			# OK
 			;;
 		*)
-			echo "Cannot accept --with-arch=$with_arch, available values are: v2 v3 v3m" 1>&2
+			echo "Cannot accept --with-cpu=$with_cpu, available values are: n7 n8 e8 n9 n10 n12 n13" 1>&2
 			exit 1
 			;;
 		esac
@@ -3885,18 +3989,50 @@
 		"")
 			# the default library is newlib
 			with_nds32_lib=newlib
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		newlib)
 			# OK
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		mculib)
 			# OK
+			# for the arch=v3f or arch=v3s under mculib toolchain,
+			# we would like to set -fno-math-errno as default
+			case "${with_arch}" in
+			v3f | v3s)
+				tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1"
+				;;
+			esac
 			;;
 		*)
 			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
 			exit 1
 			;;
 		esac
+
+		# process --with-float
+		case "${with_float}" in
+		"" | soft | hard)
+			# OK
+			;;
+		*)
+			echo "Cannot accept --with-float=$with_float, available values are: soft hard" 1>&2
+			exit 1
+			;;
+		esac
+
+		# process --with-config-fpu
+		case "${with_config_fpu}" in
+		"" | 0 | 1 | 2 | 3)
+			# OK
+			;;
+		*)
+			echo "Cannot accept --with-config-fpu=$with_config_fpu, available values from 0 to 7" 1>&2
+			exit 1
+			;;
+		esac
+
 		;;
 
 	powerpc*-*-* | rs6000-*-*)
@@ -4201,7 +4337,7 @@
 esac
 
 t=
-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan divide llsc mips-plt synci tls"
+all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan divide llsc mips-plt synci tls memory_model"
 for option in $all_defaults
 do
 	eval "val=\$with_"`echo $option | sed s/-/_/g`
diff -Nur gcc-4.9.4.orig/gcc/configure gcc-4.9.4/gcc/configure
--- gcc-4.9.4.orig/gcc/configure	2016-05-22 10:53:32.000000000 +0200
+++ gcc-4.9.4/gcc/configure	2016-08-08 20:37:45.630274892 +0200
@@ -26524,7 +26524,7 @@
 # version to the per-target configury.
 case "$cpu_type" in
   aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
-  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
   | xstormy16 | xtensa)
     insn="nop"
     ;;
diff -Nur gcc-4.9.4.orig/gcc/configure.ac gcc-4.9.4/gcc/configure.ac
--- gcc-4.9.4.orig/gcc/configure.ac	2016-05-22 10:53:32.000000000 +0200
+++ gcc-4.9.4/gcc/configure.ac	2016-08-08 20:37:45.630274892 +0200
@@ -4442,7 +4442,7 @@
 # version to the per-target configury.
 case "$cpu_type" in
   aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
-  | mips | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
+  | mips | nds32 | nios2 | pa | rs6000 | score | sparc | spu | tilegx | tilepro \
   | xstormy16 | xtensa)
     insn="nop"
     ;;
diff -Nur gcc-4.9.4.orig/gcc/cp/g++spec.c gcc-4.9.4/gcc/cp/g++spec.c
--- gcc-4.9.4.orig/gcc/cp/g++spec.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/cp/g++spec.c	2016-08-08 20:37:45.630274892 +0200
@@ -401,5 +401,12 @@
   return 0;
 }
 
+/* Called before parsing the spec to tell which language driver is used.  */
+int
+lang_specific_is_c_plus_plus (void)
+{
+  return 1;
+}
+
 /* Number of extra output files that lang_specific_pre_link may generate.  */
 int lang_specific_extra_outfiles = 0;  /* Not used for C++.  */
diff -Nur gcc-4.9.4.orig/gcc/defaults.h gcc-4.9.4/gcc/defaults.h
--- gcc-4.9.4.orig/gcc/defaults.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/defaults.h	2016-08-08 20:37:45.646275512 +0200
@@ -1085,6 +1085,10 @@
 #define LOCAL_REGNO(REGNO)  0
 #endif
 
+#ifndef HONOR_REG_ALLOC_ORDER
+#define HONOR_REG_ALLOC_ORDER 0
+#endif
+
 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
    the stack pointer does not matter.  The value is tested only in
    functions that have frame pointers.  */
diff -Nur gcc-4.9.4.orig/gcc/doc/extend.texi gcc-4.9.4/gcc/doc/extend.texi
--- gcc-4.9.4.orig/gcc/doc/extend.texi	2015-10-15 18:40:14.000000000 +0200
+++ gcc-4.9.4/gcc/doc/extend.texi	2016-08-08 20:37:47.166334359 +0200
@@ -12710,38 +12710,33 @@
 
 These built-in functions are available for the NDS32 target:
 
-@deftypefn {Built-in Function} void __builtin_nds32_isync (int *@var{addr})
+@table @code
+@item void __builtin_nds32_isync (int *@var{addr})
 Insert an ISYNC instruction into the instruction stream where
 @var{addr} is an instruction address for serialization.
-@end deftypefn
 
-@deftypefn {Built-in Function} void __builtin_nds32_isb (void)
+@item void __builtin_nds32_isb (void)
 Insert an ISB instruction into the instruction stream.
-@end deftypefn
 
-@deftypefn {Built-in Function} int __builtin_nds32_mfsr (int @var{sr})
+@item int __builtin_nds32_mfsr (int @var{sr})
 Return the content of a system register which is mapped by @var{sr}.
-@end deftypefn
 
-@deftypefn {Built-in Function} int __builtin_nds32_mfusr (int @var{usr})
+@item int __builtin_nds32_mfusr (int @var{usr})
 Return the content of a user space register which is mapped by @var{usr}.
-@end deftypefn
 
-@deftypefn {Built-in Function} void __builtin_nds32_mtsr (int @var{value}, int @var{sr})
+@item void __builtin_nds32_mtsr (int @var{value}, int @var{sr})
 Move the @var{value} to a system register which is mapped by @var{sr}.
-@end deftypefn
 
-@deftypefn {Built-in Function} void __builtin_nds32_mtusr (int @var{value}, int @var{usr})
+@item void __builtin_nds32_mtusr (int @var{value}, int @var{usr})
 Move the @var{value} to a user space register which is mapped by @var{usr}.
-@end deftypefn
 
-@deftypefn {Built-in Function} void __builtin_nds32_setgie_en (void)
+@item void __builtin_nds32_setgie_en (void)
 Enable global interrupt.
-@end deftypefn
 
-@deftypefn {Built-in Function} void __builtin_nds32_setgie_dis (void)
+@item void __builtin_nds32_setgie_dis (void)
 Disable global interrupt.
-@end deftypefn
+
+@end table
 
 @node picoChip Built-in Functions
 @subsection picoChip Built-in Functions
diff -Nur gcc-4.9.4.orig/gcc/doc/install.texi gcc-4.9.4/gcc/doc/install.texi
--- gcc-4.9.4.orig/gcc/doc/install.texi	2015-06-26 19:47:23.000000000 +0200
+++ gcc-4.9.4/gcc/doc/install.texi	2016-08-08 20:37:50.666469869 +0200
@@ -1901,7 +1901,7 @@
 
 @item --with-nds32-lib=@var{library}
 Specifies that @var{library} setting is used for building @file{libgcc.a}.
-Currently, the valid @var{library} is @samp{newlib} or @samp{mculib}.
+Currently, the valid @var{library} is 'newlib' or 'mculib'.
 This option is only supported for the NDS32 target.
 
 @item --with-build-time-tools=@var{dir}
diff -Nur gcc-4.9.4.orig/gcc/doc/invoke.texi gcc-4.9.4/gcc/doc/invoke.texi
--- gcc-4.9.4.orig/gcc/doc/invoke.texi	2016-06-07 23:49:58.000000000 +0200
+++ gcc-4.9.4/gcc/doc/invoke.texi	2016-08-08 20:37:50.670470024 +0200
@@ -835,12 +835,17 @@
 -mreduced-regs -mfull-regs @gol
 -mcmov -mno-cmov @gol
 -mperf-ext -mno-perf-ext @gol
+-mperf2-ext -mno-perf2-ext @gol
+-mstring-ext -mno-string-ext @gol
 -mv3push -mno-v3push @gol
 -m16bit -mno-16bit @gol
 -mgp-direct -mno-gp-direct @gol
 -misr-vector-size=@var{num} @gol
 -mcache-block-size=@var{num} @gol
 -march=@var{arch} @gol
+-mcpu=@var{cpu} @gol
+-mmemory-model=@var{cpu} @gol
+-mconfig-register-ports=@var{ports} @gol
 -mforce-fp-as-gp -mforbid-fp-as-gp @gol
 -mex9 -mctor-dtor -mrelax}
 
@@ -18350,6 +18355,22 @@
 @opindex mno-perf-ext
 Do not generate performance extension instructions.
 
+@item -mperf2-ext
+@opindex mperf2-ext
+Generate performance extension version 2 instructions.
+
+@item -mno-perf2-ext
+@opindex mno-perf2-ext
+Do not generate performance extension version 2 instructions.
+
+@item -mstring-ext
+@opindex mstring-ext
+Generate string extension instructions.
+
+@item -mno-string-ext
+@opindex mno-string-ext
+Do not generate string extension instructions.
+
 @item -mv3push
 @opindex mv3push
 Generate v3 push25/pop25 instructions.
@@ -18387,6 +18408,19 @@
 @opindex march
 Specify the name of the target architecture.
 
+@item -mcpu=@var{cpu}
+@opindex mcpu
+Specify the cpu for pipeline model.
+
+@item -mmemory-model=@var{cpu}
+@opindex mmemory-model
+Specify fast or slow memory model.
+
+@item -mconfig-register-ports=@var{ports}
+@opindex mconfig-register-ports
+Specify how many read/write ports for n9/n10 cores.
+The value should be 3r2w or 2r1w.
+
 @item -mforce-fp-as-gp
 @opindex mforce-fp-as-gp
 Prevent $fp being allocated during register allocation so that compiler
diff -Nur gcc-4.9.4.orig/gcc/doc/tm.texi gcc-4.9.4/gcc/doc/tm.texi
--- gcc-4.9.4.orig/gcc/doc/tm.texi	2014-11-16 16:50:33.000000000 +0100
+++ gcc-4.9.4/gcc/doc/tm.texi	2016-08-08 20:37:51.174489537 +0200
@@ -2044,8 +2044,8 @@
 prologue and restoring it in the epilogue.  This discourages it from
 using call-saved registers.  If a machine wants to ensure that IRA
 allocates registers in the order given by REG_ALLOC_ORDER even if some
-call-saved registers appear earlier than call-used ones, this macro
-should be defined.
+call-saved registers appear earlier than call-used ones, then define this
+ macro as a C expression to nonzero. Default is 0.
 @end defmac
 
 @defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno})
diff -Nur gcc-4.9.4.orig/gcc/doc/tm.texi.in gcc-4.9.4/gcc/doc/tm.texi.in
--- gcc-4.9.4.orig/gcc/doc/tm.texi.in	2014-11-16 16:50:33.000000000 +0100
+++ gcc-4.9.4/gcc/doc/tm.texi.in	2016-08-08 20:37:51.174489537 +0200
@@ -1849,8 +1849,8 @@
 prologue and restoring it in the epilogue.  This discourages it from
 using call-saved registers.  If a machine wants to ensure that IRA
 allocates registers in the order given by REG_ALLOC_ORDER even if some
-call-saved registers appear earlier than call-used ones, this macro
-should be defined.
+call-saved registers appear earlier than call-used ones, then define this
+ macro as a C expression to nonzero. Default is 0.
 @end defmac
 
 @defmac IRA_HARD_REGNO_ADD_COST_MULTIPLIER (@var{regno})
diff -Nur gcc-4.9.4.orig/gcc/final.c gcc-4.9.4/gcc/final.c
--- gcc-4.9.4.orig/gcc/final.c	2014-02-18 22:16:21.000000000 +0100
+++ gcc-4.9.4/gcc/final.c	2016-08-08 20:37:51.174489537 +0200
@@ -1004,6 +1004,7 @@
   /* Allocate the rest of the arrays.  */
   insn_lengths = XNEWVEC (int, max_uid);
   insn_lengths_max_uid = max_uid;
+  memset (insn_lengths, 0, sizeof (int) * max_uid);
   /* Syntax errors can lead to labels being outside of the main insn stream.
      Initialize insn_addresses, so that we get reproducible results.  */
   INSN_ADDRESSES_ALLOC (max_uid);
diff -Nur gcc-4.9.4.orig/gcc/gcc.c gcc-4.9.4/gcc/gcc.c
--- gcc-4.9.4.orig/gcc/gcc.c	2015-06-26 19:47:23.000000000 +0200
+++ gcc-4.9.4/gcc/gcc.c	2016-08-08 20:37:52.410537389 +0200
@@ -801,6 +801,14 @@
 # define SYSROOT_HEADERS_SUFFIX_SPEC ""
 #endif
 
+#ifndef STARTFILE_CXX_SPEC
+#define STARTFILE_CXX_SPEC STARTFILE_SPEC
+#endif
+
+#ifndef ENDFILE_CXX_SPEC
+#define ENDFILE_CXX_SPEC ENDFILE_SPEC
+#endif
+
 static const char *asm_debug = ASM_DEBUG_SPEC;
 static const char *cpp_spec = CPP_SPEC;
 static const char *cc1_spec = CC1_SPEC;
@@ -827,6 +835,9 @@
 static const char *sysroot_hdrs_suffix_spec = SYSROOT_HEADERS_SUFFIX_SPEC;
 static const char *self_spec = "";
 
+static const char *startfile_cxx_spec = STARTFILE_CXX_SPEC;
+static const char *endfile_cxx_spec = ENDFILE_CXX_SPEC;
+
 /* Standard options to cpp, cc1, and as, to reduce duplication in specs.
    There should be no need to override these in target dependent files,
    but we need to copy them to the specs file so that newer versions
@@ -1326,6 +1337,9 @@
   INIT_STATIC_SPEC ("sysroot_suffix_spec",	&sysroot_suffix_spec),
   INIT_STATIC_SPEC ("sysroot_hdrs_suffix_spec",	&sysroot_hdrs_suffix_spec),
   INIT_STATIC_SPEC ("self_spec",		&self_spec),
+
+  INIT_STATIC_SPEC ("startfile_cxx",		&startfile_cxx_spec),
+  INIT_STATIC_SPEC ("endfile_cxx",		&endfile_cxx_spec),
 };
 
 #ifdef EXTRA_SPECS		/* additional specs needed */
@@ -5256,7 +5270,11 @@
 	    break;
 
 	  case 'E':
-	    value = do_spec_1 (endfile_spec, 0, NULL);
+	    if (lang_specific_is_c_plus_plus ())
+	      value = do_spec_1 (endfile_cxx_spec, 0, NULL);
+	    else
+	      value = do_spec_1 (endfile_spec, 0, NULL);
+
 	    if (value != 0)
 	      return value;
 	    break;
@@ -5301,7 +5319,11 @@
 	    break;
 
 	  case 'S':
-	    value = do_spec_1 (startfile_spec, 0, NULL);
+	    if (lang_specific_is_c_plus_plus ())
+	      value = do_spec_1 (startfile_cxx_spec, 0, NULL);
+	    else
+	      value = do_spec_1 (startfile_spec, 0, NULL);
+
 	    if (value != 0)
 	      return value;
 	    break;
@@ -7496,7 +7518,7 @@
 	{
 	  const char *r;
 
-	  for (q = multilib_options; *q != '\0'; q++)
+	  for (q = multilib_options; *q != '\0'; *q && q++)
 	    {
 	      while (*q == ' ')
 		q++;
diff -Nur gcc-4.9.4.orig/gcc/gcc.h gcc-4.9.4/gcc/gcc.h
--- gcc-4.9.4.orig/gcc/gcc.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/gcc.h	2016-08-08 20:37:52.410537389 +0200
@@ -46,6 +46,9 @@
 /* Called before linking.  Returns 0 on success and -1 on failure.  */
 extern int lang_specific_pre_link (void);
 
+/* Called before parsing the spec to tell which language driver is used.  */
+extern int lang_specific_is_c_plus_plus (void);
+
 extern int n_infiles;
 
 /* Number of extra output files that lang_specific_pre_link may generate.  */
diff -Nur gcc-4.9.4.orig/gcc/genoutput.c gcc-4.9.4/gcc/genoutput.c
--- gcc-4.9.4.orig/gcc/genoutput.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/genoutput.c	2016-08-08 20:37:53.534580908 +0200
@@ -210,7 +210,7 @@
 /* This is a complete list (unlike the one in genpreds.c) of constraint
    letters and modifiers with machine-independent meaning.  The only
    omission is digits, as these are handled specially.  */
-static const char indep_constraints[] = ",=+%*?!#&<>EFVXgimnoprs";
+static const char indep_constraints[] = ",=+%*$?!#&<>EFVXgimnoprs";
 
 static struct constraint_data *
 constraints_by_letter_table[1 << CHAR_BIT];
diff -Nur gcc-4.9.4.orig/gcc/hw-doloop.c gcc-4.9.4/gcc/hw-doloop.c
--- gcc-4.9.4.orig/gcc/hw-doloop.c	2016-01-22 15:49:22.000000000 +0100
+++ gcc-4.9.4/gcc/hw-doloop.c	2016-08-08 20:37:53.534580908 +0200
@@ -57,6 +57,8 @@
 	       loop->head == NULL ? -1 : loop->head->index,
 	       loop->depth, REGNO (loop->iter_reg));
 
+      fprintf (dump_file, " outermost: [%d] ", loop->outermost->loop_no);
+
       fprintf (dump_file, " blocks: [ ");
       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
 	fprintf (dump_file, "%d ", b->index);
@@ -84,6 +86,7 @@
 {
   unsigned ix;
   basic_block bb;
+  regset set_this_insn = ALLOC_REG_SET (NULL);
 
   if (loop->bad)
     return;
@@ -120,7 +123,6 @@
 	   insn = NEXT_INSN (insn))
 	{
 	  df_ref *def_rec;
-	  HARD_REG_SET set_this_insn;
 
 	  if (!NONDEBUG_INSN_P (insn))
 	    continue;
@@ -130,23 +132,45 @@
 		  || asm_noperands (PATTERN (insn)) >= 0))
 	    loop->has_asm = true;
 
-	  CLEAR_HARD_REG_SET (set_this_insn);
+	  CLEAR_REG_SET (set_this_insn);
 	  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 	    {
 	      rtx dreg = DF_REF_REG (*def_rec);
+	      unsigned int regno, nregs;
 
 	      if (!REG_P (dreg))
 		continue;
 
-	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
-				   REGNO (dreg));
+	      regno = REGNO (dreg);
+	      nregs = GET_MODE (dreg) / UNITS_PER_WORD;
+	      bitmap_set_range (set_this_insn, regno, nregs);
 	    }
 
 	  if (insn == loop->loop_end)
-	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
+	    CLEAR_REGNO_REG_SET (set_this_insn, REGNO (loop->iter_reg));
 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
 	    loop->iter_reg_used = true;
-	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
+	  IOR_REG_SET (loop->regs_set_in_loop, set_this_insn);
+	}
+    }
+  FREE_REG_SET (set_this_insn);
+}
+
+/* Get outermost loop for each loop.  */
+static void
+add_outermost (hwloop_info loop)
+{
+  int ix;
+  hwloop_info inner;
+
+  if (!loop->outermost)
+    {
+      loop->outermost = loop;
+
+      for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
+	{
+	  if (loop->loop_no != inner->loop_no)
+	    inner->outermost = loop;
 	}
     }
 }
@@ -404,6 +428,7 @@
       loop->loop_no = nloops++;
       loop->blocks.create (20);
       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
+      loop->regs_set_in_loop = ALLOC_REG_SET (NULL);
 
       if (dump_file)
 	{
@@ -449,6 +474,10 @@
 	}
     }
 
+  /* Get outermost loop for each loop.  */
+  for (loop = loops; loop; loop = loop->next)
+    add_outermost (loop);
+
   if (dump_file)
     dump_hwloops (loops);
 
@@ -466,6 +495,7 @@
       loop->loops.release ();
       loop->blocks.release ();
       BITMAP_FREE (loop->block_bitmap);
+      FREE_REG_SET (loop->regs_set_in_loop);
       XDELETE (loop);
     }
 }
@@ -549,6 +579,32 @@
   df_analyze ();
 }
 
+/* Compute real depth for each loop, for example
+   if 3 neseting depth of loop, the depth form
+   outermost to innermost is 1, 2, 3.  */
+static void
+compute_real_depth (hwloop_info loop)
+{
+  int ix;
+  hwloop_info inner;
+  int inner_depth = 0;
+
+  if (loop->computed_depth)
+    return;
+
+  loop->computed_depth = 1;
+
+  for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
+    {
+      compute_real_depth (inner);
+
+      if (inner_depth < inner->real_depth)
+	inner_depth = inner->real_depth;
+    }
+
+  loop->real_depth = inner_depth + 1;
+}
+
 /* Call the OPT function for LOOP and all of its sub-loops.  This is
    done in a depth-first search; innermost loops are visited first.
    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
@@ -585,7 +641,7 @@
 	inner_depth = inner->depth;
       /* The set of registers may be changed while optimizing the inner
 	 loop.  */
-      IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
+      IOR_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
     }
 
   loop->depth = inner_depth + 1;
@@ -652,6 +708,10 @@
   for (loop = loops; loop; loop = loop->next)
     scan_loop (loop);
 
+  /* Compute real depth for each loop.  */
+  for (loop = loops; loop; loop = loop->next)
+    compute_real_depth (loop);
+
   /* Now apply the optimizations.  */
   for (loop = loops; loop; loop = loop->next)
     optimize_loop (loop, hooks);
diff -Nur gcc-4.9.4.orig/gcc/hw-doloop.h gcc-4.9.4/gcc/hw-doloop.h
--- gcc-4.9.4.orig/gcc/hw-doloop.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/hw-doloop.h	2016-08-08 20:37:53.534580908 +0200
@@ -31,6 +31,9 @@
   /* Next loop in the graph. */
   hwloop_info next;
 
+  /* Outermost loop in the graph.  */
+  hwloop_info outermost;
+
   /* Vector of blocks only within the loop, including those within
      inner loops.  */
   vec<basic_block> blocks;
@@ -90,12 +93,19 @@
      This value is valid when the target's optimize function is called.  */
   int depth;
 
+  /* The nesting depth of the loop.  If 3 neseting depth of loop,
+     the depth form outermost to innermost is 1, 2, 3.  */
+  int real_depth;
+
   /* True if we can't optimize this loop.  */
   bool bad;
 
   /* True if we have visited this loop during the optimization phase.  */
   bool visited;
 
+  /* True if we have computed this loop real depth.  */
+  bool computed_depth;
+
   /* The following values are collected before calling the target's optimize
      function and are not valid earlier.  */
   
@@ -115,7 +125,7 @@
 
   /* Hard registers set at any point in the loop, except for the loop counter
      register's set in the doloop_end instruction.  */
-  HARD_REG_SET regs_set_in_loop;
+  regset regs_set_in_loop;
 };
 
 /* A set of hooks to be defined by a target that wants to use the reorg_loops
diff -Nur gcc-4.9.4.orig/gcc/ira.c gcc-4.9.4/gcc/ira.c
--- gcc-4.9.4.orig/gcc/ira.c	2016-03-31 15:21:43.000000000 +0200
+++ gcc-4.9.4/gcc/ira.c	2016-08-08 20:37:53.534580908 +0200
@@ -5446,14 +5446,16 @@
 #ifdef ENABLE_IRA_CHECKING
       print_redundant_copies ();
 #endif
-
-      ira_spilled_reg_stack_slots_num = 0;
-      ira_spilled_reg_stack_slots
-	= ((struct ira_spilled_reg_stack_slot *)
-	   ira_allocate (max_regno
-			 * sizeof (struct ira_spilled_reg_stack_slot)));
-      memset (ira_spilled_reg_stack_slots, 0,
-	      max_regno * sizeof (struct ira_spilled_reg_stack_slot));
+      if (! ira_use_lra_p)
+	{
+	  ira_spilled_reg_stack_slots_num = 0;
+	  ira_spilled_reg_stack_slots
+	    = ((struct ira_spilled_reg_stack_slot *)
+	       ira_allocate (max_regno
+			     * sizeof (struct ira_spilled_reg_stack_slot)));
+	  memset (ira_spilled_reg_stack_slots, 0,
+		  max_regno * sizeof (struct ira_spilled_reg_stack_slot));
+	}
     }
   allocate_initial_values ();
 
@@ -5489,9 +5491,6 @@
       FOR_ALL_BB_FN (bb, cfun)
 	bb->loop_father = NULL;
       current_loops = NULL;
-      
-      if (ira_conflicts_p)
-	ira_free (ira_spilled_reg_stack_slots);
 
       ira_destroy ();
 
diff -Nur gcc-4.9.4.orig/gcc/ira-color.c gcc-4.9.4/gcc/ira-color.c
--- gcc-4.9.4.orig/gcc/ira-color.c	2014-03-26 07:46:27.000000000 +0100
+++ gcc-4.9.4/gcc/ira-color.c	2016-08-08 20:37:53.534580908 +0200
@@ -1599,7 +1599,6 @@
     }
   return j == nregs;
 }
-#ifndef HONOR_REG_ALLOC_ORDER
 
 /* Return number of registers needed to be saved and restored at
    function prologue/epilogue if we allocate HARD_REGNO to hold value
@@ -1618,7 +1617,6 @@
       nregs++;
   return nregs;
 }
-#endif
 
 /* Choose a hard register for allocno A.  If RETRY_P is TRUE, it means
    that the function called from function
@@ -1653,11 +1651,9 @@
   enum reg_class aclass;
   enum machine_mode mode;
   static int costs[FIRST_PSEUDO_REGISTER], full_costs[FIRST_PSEUDO_REGISTER];
-#ifndef HONOR_REG_ALLOC_ORDER
   int saved_nregs;
   enum reg_class rclass;
   int add_cost;
-#endif
 #ifdef STACK_REGS
   bool no_stack_reg_p;
 #endif
@@ -1823,19 +1819,19 @@
 	continue;
       cost = costs[i];
       full_cost = full_costs[i];
-#ifndef HONOR_REG_ALLOC_ORDER
-      if ((saved_nregs = calculate_saved_nregs (hard_regno, mode)) != 0)
-	/* We need to save/restore the hard register in
-	   epilogue/prologue.  Therefore we increase the cost.  */
-	{
-	  rclass = REGNO_REG_CLASS (hard_regno);
-	  add_cost = ((ira_memory_move_cost[mode][rclass][0]
-		       + ira_memory_move_cost[mode][rclass][1])
-		      * saved_nregs / hard_regno_nregs[hard_regno][mode] - 1);
-	  cost += add_cost;
-	  full_cost += add_cost;
-	}
-#endif
+      if (!HONOR_REG_ALLOC_ORDER)
+	if ((saved_nregs = calculate_saved_nregs (hard_regno, mode)) != 0)
+	  /* We need to save/restore the hard register in
+	     epilogue/prologue.  Therefore we increase the cost.  */
+	  {
+	    rclass = REGNO_REG_CLASS (hard_regno);
+	    add_cost = ((ira_memory_move_cost[mode][rclass][0]
+			 + ira_memory_move_cost[mode][rclass][1])
+			* saved_nregs / hard_regno_nregs[hard_regno][mode] - 1);
+	    cost += add_cost;
+	    full_cost += add_cost;
+	  }
+
       if (min_cost > cost)
 	min_cost = cost;
       if (min_full_cost > full_cost)
@@ -4068,6 +4064,8 @@
   ira_allocno_iterator ai;
   ira_allocno_t *spilled_coalesced_allocnos;
 
+  ira_assert (! ira_use_lra_p);
+
   /* Set up allocnos can be coalesced.  */
   coloring_allocno_bitmap = ira_allocate_bitmap ();
   for (i = 0; i < n; i++)
@@ -4417,6 +4415,8 @@
   bitmap_iterator bi;
   struct ira_spilled_reg_stack_slot *slot = NULL;
 
+  ira_assert (! ira_use_lra_p);
+
   ira_assert (inherent_size == PSEUDO_REGNO_BYTES (regno)
 	      && inherent_size <= total_size
 	      && ALLOCNO_HARD_REGNO (allocno) < 0);
@@ -4529,6 +4529,8 @@
   int slot_num;
   ira_allocno_t allocno;
 
+  ira_assert (! ira_use_lra_p);
+
   ira_assert (PSEUDO_REGNO_BYTES (regno) <= total_size);
   allocno = ira_regno_allocno_map[regno];
   slot_num = -ALLOCNO_HARD_REGNO (allocno) - 2;
diff -Nur gcc-4.9.4.orig/gcc/ira-costs.c gcc-4.9.4/gcc/ira-costs.c
--- gcc-4.9.4.orig/gcc/ira-costs.c	2014-05-22 23:10:26.000000000 +0200
+++ gcc-4.9.4/gcc/ira-costs.c	2016-08-08 20:37:53.534580908 +0200
@@ -652,6 +652,11 @@
 		  c = *++p;
 		  break;
 
+		case '$':
+		  if (optimize_size)
+		    alt_cost -= 2;
+		  break;
+
 		case '?':
 		  alt_cost += 2;
 		case '!':  case '#':  case '&':
diff -Nur gcc-4.9.4.orig/gcc/ira-lives.c gcc-4.9.4/gcc/ira-lives.c
--- gcc-4.9.4.orig/gcc/ira-lives.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/ira-lives.c	2016-08-08 20:37:53.534580908 +0200
@@ -770,6 +770,7 @@
 	case '%':
 	case '!':
 	case '?':
+	case '$':
 	  break;
 	case 'i':
 	  if (CONSTANT_P (op)
diff -Nur gcc-4.9.4.orig/gcc/lra.c gcc-4.9.4/gcc/lra.c
--- gcc-4.9.4.orig/gcc/lra.c	2015-04-09 21:42:24.000000000 +0200
+++ gcc-4.9.4/gcc/lra.c	2016-08-08 20:37:53.538581063 +0200
@@ -815,7 +815,7 @@
 
 	      switch (c)
 		{
-		case '=': case '+': case '*':
+		case '=': case '+': case '*': case '$':
 		case 'E': case 'F': case 'G': case 'H':
 		case 's': case 'i': case 'n':
 		case 'I': case 'J': case 'K': case 'L':
diff -Nur gcc-4.9.4.orig/gcc/lra-constraints.c gcc-4.9.4/gcc/lra-constraints.c
--- gcc-4.9.4.orig/gcc/lra-constraints.c	2015-11-26 23:13:36.000000000 +0100
+++ gcc-4.9.4/gcc/lra-constraints.c	2016-08-08 20:37:53.538581063 +0200
@@ -1698,7 +1698,7 @@
 		  c = '\0';
 		  break;
 
-		case '=':  case '+': case '?': case '*': case '!':
+		case '=':  case '+': case '?': case '*': case '!': case '$':
 		case ' ': case '\t':
 		  break;
 
@@ -4859,9 +4859,8 @@
       reg_renumber[REGNO (new_reg)] = hard_regno;
     }
   save = emit_spill_move (true, new_reg, original_reg);
-  if (NEXT_INSN (save) != NULL_RTX)
+  if (NEXT_INSN (save) != NULL_RTX && !call_save_p)
     {
-      lra_assert (! call_save_p);
       if (lra_dump_file != NULL)
 	{
 	  fprintf
@@ -4875,9 +4874,8 @@
       return false;
     }
   restore = emit_spill_move (false, new_reg, original_reg);
-  if (NEXT_INSN (restore) != NULL_RTX)
+  if (NEXT_INSN (restore) != NULL_RTX && !call_save_p)
     {
-      lra_assert (! call_save_p);
       if (lra_dump_file != NULL)
 	{
 	  fprintf (lra_dump_file,
diff -Nur gcc-4.9.4.orig/gcc/Makefile.in gcc-4.9.4/gcc/Makefile.in
--- gcc-4.9.4.orig/gcc/Makefile.in	2014-10-16 15:50:42.000000000 +0200
+++ gcc-4.9.4/gcc/Makefile.in	2016-08-08 20:37:45.494269627 +0200
@@ -1402,6 +1402,7 @@
 	tree-scalar-evolution.o \
 	tree-sra.o \
 	tree-switch-conversion.o \
+	tree-switch-shortcut.o \
 	tree-ssa-address.o \
 	tree-ssa-alias.o \
 	tree-ssa-ccp.o \
diff -Nur gcc-4.9.4.orig/gcc/params.def gcc-4.9.4/gcc/params.def
--- gcc-4.9.4.orig/gcc/params.def	2014-10-16 15:51:45.000000000 +0200
+++ gcc-4.9.4/gcc/params.def	2016-08-08 20:37:53.538581063 +0200
@@ -64,7 +64,7 @@
 DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE,
 	  "max-inline-insns-single",
 	  "The maximum number of instructions in a single function eligible for inlining",
-	  400, 0, 0)
+	  450, 0, 0)
 
 /* The single function inlining limit for functions that are
    inlined by virtue of -finline-functions (-O3).
@@ -76,7 +76,7 @@
 DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO,
 	  "max-inline-insns-auto",
 	  "The maximum number of instructions when automatically inlining",
-	  40, 0, 0)
+	  90, 0, 0)
 
 DEFPARAM (PARAM_MAX_INLINE_INSNS_RECURSIVE,
 	  "max-inline-insns-recursive",
@@ -1054,6 +1054,22 @@
 	  "strength reduction",
 	  50, 1, 999999)
 
+/* Maximum number of instructions to duplicate when shortcutting a switch.  */
+DEFPARAM (PARAM_MAX_SWITCH_INSNS,
+	  "max-switch-insns",
+	  "Maximum number of instructions to duplicate when "
+	  "shortcutting a switch statement",
+	  100, 1, 999999)
+
+/* Maximum number of paths to duplicate when shortcutting a switch.  */
+DEFPARAM (PARAM_MAX_SWITCH_PATHS,
+	  "max-switch-paths",
+	  "Maximum number of new paths to create when"
+	  " shortcutting a switch statement",
+	  50, 1, 999999)
+
+
+
 DEFPARAM (PARAM_ASAN_STACK,
          "asan-stack",
          "Enable asan stack protection",
diff -Nur gcc-4.9.4.orig/gcc/passes.def gcc-4.9.4/gcc/passes.def
--- gcc-4.9.4.orig/gcc/passes.def	2014-01-17 18:50:10.000000000 +0100
+++ gcc-4.9.4/gcc/passes.def	2016-08-08 20:37:53.538581063 +0200
@@ -152,6 +152,7 @@
       NEXT_PASS (pass_call_cdce);
       NEXT_PASS (pass_cselim);
       NEXT_PASS (pass_tree_ifcombine);
+      NEXT_PASS (pass_switch_shortcut);
       NEXT_PASS (pass_phiopt);
       NEXT_PASS (pass_tail_recursion);
       NEXT_PASS (pass_ch);
diff -Nur gcc-4.9.4.orig/gcc/recog.c gcc-4.9.4/gcc/recog.c
--- gcc-4.9.4.orig/gcc/recog.c	2015-03-24 08:12:03.000000000 +0100
+++ gcc-4.9.4/gcc/recog.c	2016-08-08 20:37:53.538581063 +0200
@@ -1690,6 +1690,7 @@
 	case '#':
 	case '&':
 	case '?':
+	case '$':
 	  break;
 
 	case '0': case '1': case '2': case '3': case '4':
@@ -2298,6 +2299,7 @@
 	      switch (c)
 		{
 		case '=': case '+': case '*': case '%':
+		case '$':
 		case 'E': case 'F': case 'G': case 'H':
 		case 's': case 'i': case 'n':
 		case 'I': case 'J': case 'K': case 'L':
@@ -2500,7 +2502,7 @@
 		c = '\0';
 		break;
 
-	      case '?':  case '!': case '*':  case '%':
+	      case '?':  case '!': case '*':  case '%': case '$':
 	      case '=':  case '+':
 		break;
 
diff -Nur gcc-4.9.4.orig/gcc/reload1.c gcc-4.9.4/gcc/reload1.c
--- gcc-4.9.4.orig/gcc/reload1.c	2014-02-26 22:57:40.000000000 +0100
+++ gcc-4.9.4/gcc/reload1.c	2016-08-08 20:37:53.542581218 +0200
@@ -1393,6 +1393,7 @@
 		case '>': case 'V': case 'o': case '&': case 'E': case 'F':
 		case 's': case 'i': case 'n': case 'X': case 'I': case 'J':
 		case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
+		case '$':
 		case TARGET_MEM_CONSTRAINT:
 		  break;
 
diff -Nur gcc-4.9.4.orig/gcc/reload.c gcc-4.9.4/gcc/reload.c
--- gcc-4.9.4.orig/gcc/reload.c	2014-12-17 16:07:28.000000000 +0100
+++ gcc-4.9.4/gcc/reload.c	2016-08-08 20:37:53.542581218 +0200
@@ -3208,7 +3208,7 @@
 		    c = '\0';
 		    break;
 
-		  case '=':  case '+':  case '*':
+		  case '=':  case '+':  case '*': case '$':
 		    break;
 
 		  case '%':
diff -Nur gcc-4.9.4.orig/gcc/stmt.c gcc-4.9.4/gcc/stmt.c
--- gcc-4.9.4.orig/gcc/stmt.c	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/stmt.c	2016-08-08 20:37:53.542581218 +0200
@@ -290,6 +290,7 @@
 	break;
 
       case '?':  case '!':  case '*':  case '&':  case '#':
+      case '$':
       case 'E':  case 'F':  case 'G':  case 'H':
       case 's':  case 'i':  case 'n':
       case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
@@ -389,7 +390,7 @@
 	break;
 
       case '<':  case '>':
-      case '?':  case '!':  case '*':  case '#':
+      case '?':  case '!':  case '*':  case '#': case '$':
       case 'E':  case 'F':  case 'G':  case 'H':
       case 's':  case 'i':  case 'n':
       case 'I':  case 'J':  case 'K':  case 'L':  case 'M':
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c gcc-4.9.4/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c	2009-08-25 23:38:33.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,5 @@
 /* { dg-skip-if "too complex for avr and picochip" { picochip-*-* avr-*-* } { "*" } { "" } } */
+/* { dg-skip-if "lto may cause internal compiler error on cygwin with gcc-4.9" { nds32*-*-* } { "*" } { "" } } */
 /* { dg-timeout-factor 4.0 } */
 #define LIM1(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9,
 #define LIM2(x) LIM1(x##0) LIM1(x##1) LIM1(x##2) LIM1(x##3) LIM1(x##4) \
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/20010122-1.x gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/20010122-1.x
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/20010122-1.x	2002-07-17 19:54:16.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/20010122-1.x	2016-08-08 20:37:53.546581373 +0200
@@ -8,4 +8,12 @@
   }
 }
 
+# Please see Andes Bugzilla #10942 for the details.
+if { [istarget "nds32*-*-*"] } {
+  # The __builtin_return_address(1) on nds32 target is able to
+  # return something useful as long as we always save $lp register
+  # in the stack.
+  set additional_flags "-malways-save-lp"
+}
+
 return 0
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/920501-8.x gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/920501-8.x
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/920501-8.x	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/920501-8.x	2016-08-08 20:37:53.546581373 +0200
@@ -0,0 +1,11 @@
+# Please see Andes Bugzilla #11005 for the details.
+if { [istarget "nds32*-*-*"] } {
+	# The nds32 mculib toolchains require
+	# "-u_printf_float" and "-u_scanf_float" options
+	# to fully support printf and scanf functionality.
+	# These options are supposed to be harmless to newlib toolchain.
+	set additional_flags "-u_printf_float -u_scanf_float"
+}
+
+return 0
+
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/930513-1.x gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/930513-1.x
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/930513-1.x	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/930513-1.x	2016-08-08 20:37:53.546581373 +0200
@@ -0,0 +1,11 @@
+# Please see Andes Bugzilla #11005 for the details.
+if { [istarget "nds32*-*-*"] } {
+	# The nds32 mculib toolchains require
+	# "-u_printf_float" and "-u_scanf_float" options
+	# to fully support printf and scanf functionality.
+	# These options are supposed to be harmless to newlib toolchain.
+	set additional_flags "-u_printf_float -u_scanf_float"
+}
+
+return 0
+
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/ieee/ieee.exp	2016-08-08 20:37:53.546581373 +0200
@@ -30,6 +30,10 @@
 # Disable tests on machines with no hardware support for IEEE arithmetic.
 if { [istarget "vax-*-*"] || [ istarget "powerpc-*-*spe"] || [istarget "pdp11-*-*"] } { return }
 
+# Since we cannot use dg-skip-if or dg-require-effective-target for individual
+# test case under ieee category, we disable all ieee tests on nds32 fpu toolchains.
+if { [istarget "nds32*-*-*"] && [check_effective_target_nds32_ext_fpu] } { return }
+
 if $tracelevel then {
     strace $tracelevel
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x
--- gcc-4.9.4.orig/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.c-torture/execute/struct-ret-1.x	2016-08-08 20:37:53.546581373 +0200
@@ -0,0 +1,11 @@
+# Please see Andes Bugzilla #11005 for the details.
+if { [istarget "nds32*-*-*"] } {
+	# The nds32 mculib toolchains require
+	# "-u_printf_float" and "-u_scanf_float" options
+	# to fully support printf and scanf functionality.
+	# These options are supposed to be harmless to newlib toolchain.
+	set additional_flags "-u_printf_float -u_scanf_float"
+}
+
+return 0
+
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/constructor-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/constructor-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/constructor-1.c	2011-10-31 15:26:38.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/constructor-1.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2" } */
+/* { dg-options "-O2 -mctor-dtor" { target { nds32*-*-* } } } */
 
 /* The ipa-split pass pulls the body of the if(!x) block
    into a separate function to make foo a better inlining
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-0.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-0.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-0.c	2010-04-22 21:50:23.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-0.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-10.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-10.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-10.c	2010-02-07 20:49:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-10.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 #if DEBUG
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-11.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-11.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-11.c	2011-01-25 07:45:54.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-11.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 #if DEBUG
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-15.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-15.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-15.c	2015-07-25 22:33:33.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-15.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 #if DEBUG
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-1.c	2011-01-25 07:45:54.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-1.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 /* Formerly known as ltrans-1.c */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-2.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-2.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-2.c	2010-02-07 20:49:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-2.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 /* Formerly known as ltrans-2.c */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-3.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-3.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-3.c	2011-07-26 20:48:08.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-3.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 /* Formerly known as ltrans-3.c */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-4.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-4.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-4.c	2010-02-07 20:49:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-4.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 /* Formerly known as ltrans-4.c */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-5.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-5.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-5.c	2010-02-07 20:49:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-5.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 /* Formerly known as ltrans-5.c */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c	2015-07-25 22:33:33.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,6 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 #if DEBUG
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/pr46185.c gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/pr46185.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/graphite/pr46185.c	2010-12-07 02:29:10.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/graphite/pr46185.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,5 +1,7 @@
 /* { dg-do run } */
+/* { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } } */
 /* { dg-options "-O2 -floop-interchange -ffast-math -fno-ipa-cp" } */
+/* { dg-additional-options "-mcmodel=large" { target nds32*-*-elf* } } */
 
 #define DEBUG 0
 #if DEBUG
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri1.c gcc-4.9.4/gcc/testsuite/gcc.dg/initpri1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri1.c	2007-02-25 19:47:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/initpri1.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,5 @@
 /* { dg-do run { target init_priority } } */
+/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */
 
 extern void abort ();
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri2.c gcc-4.9.4/gcc/testsuite/gcc.dg/initpri2.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri2.c	2007-02-26 16:53:51.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/initpri2.c	2016-08-08 20:37:53.546581373 +0200
@@ -1,4 +1,5 @@
 /* { dg-do compile { target init_priority } } */
+/* { dg-options "-mctor-dtor" { target { nds32*-*-* } } } */
 
 /* Priorities must be in the range [0, 65535].  */
 void c1()
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri3.c gcc-4.9.4/gcc/testsuite/gcc.dg/initpri3.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/initpri3.c	2011-01-10 22:54:33.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/initpri3.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,6 +1,7 @@
 /* { dg-do run { target init_priority } } */
 /* { dg-require-effective-target lto } */
 /* { dg-options "-flto -O3" } */
+/* { dg-options "-flto -O3 -mctor-dtor" { target { nds32*-*-* } } } */
 
 extern void abort ();
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c	2010-09-10 01:38:23.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details"  } */
+/* { dg-additional-options "-u_printf_float -u_scanf_float" { target nds32*-*-* } } */
 
 struct bovid
 {
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/lower-subreg-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/lower-subreg-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/lower-subreg-1.c	2013-08-09 22:48:00.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/lower-subreg-1.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */
+/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* nds32*-*-* } } } } } */
 /* { dg-options "-O -fdump-rtl-subreg1" } */
 /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/lto/pr61526_0.c gcc-4.9.4/gcc/testsuite/gcc.dg/lto/pr61526_0.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/lto/pr61526_0.c	2014-06-17 11:08:02.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/lto/pr61526_0.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,6 +1,7 @@
 /* { dg-lto-do link } */
 /* { dg-lto-options { { -fPIC -flto -flto-partition=1to1 } } } */
 /* { dg-extra-ld-options { -shared } } */
+/* { dg-require-effective-target fpic } */
 
 static void *master;
 void *foo () { return master; }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/pr28796-2.c gcc-4.9.4/gcc/testsuite/gcc.dg/pr28796-2.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/pr28796-2.c	2012-03-14 18:08:03.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/pr28796-2.c	2016-08-08 20:37:53.550581527 +0200
@@ -2,6 +2,7 @@
 /* { dg-options "-O2 -funsafe-math-optimizations -fno-finite-math-only -DUNSAFE" } */
 /* { dg-add-options ieee } */
 /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
+/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
 
 #include "tg-tests.h"
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/sibcall-3.c gcc-4.9.4/gcc/testsuite/gcc.dg/sibcall-3.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/sibcall-3.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/sibcall-3.c	2016-08-08 20:37:53.550581527 +0200
@@ -5,7 +5,7 @@
    Copyright (C) 2002 Free Software Foundation Inc.
    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
 
-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/sibcall-4.c gcc-4.9.4/gcc/testsuite/gcc.dg/sibcall-4.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/sibcall-4.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/sibcall-4.c	2016-08-08 20:37:53.550581527 +0200
@@ -5,7 +5,7 @@
    Copyright (C) 2002 Free Software Foundation Inc.
    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
 
-/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* nds32*-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
+/* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/stack-usage-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/stack-usage-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/stack-usage-1.c	2013-12-31 08:05:35.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/stack-usage-1.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-fstack-usage" } */
+/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */
 
 /* This is aimed at testing basic support for -fstack-usage in the back-ends.
    See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c).
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/torture/type-generic-1.c gcc-4.9.4/gcc/testsuite/gcc.dg/torture/type-generic-1.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/torture/type-generic-1.c	2011-07-16 14:07:17.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/torture/type-generic-1.c	2016-08-08 20:37:53.550581527 +0200
@@ -3,6 +3,7 @@
 
 /* { dg-do run } */
 /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
+/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
 /* { dg-options "-DUNSAFE" { target tic6x*-*-* } } */
 /* { dg-add-options ieee } */
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c gcc-4.9.4/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c	2013-04-29 14:52:17.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/gcc.dg/tree-ssa/vrp88.c	2016-08-08 20:37:53.550581527 +0200
@@ -33,7 +33,7 @@
 }
 
 /* Verify that VRP simplified an "if" statement.  */
-/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1"} } */
+/* { dg-final { scan-tree-dump "Folded into: if.*" "vrp1" { xfail *-*-* } } } */
 /* { dg-final { cleanup-tree-dump "vrp1" } } */
 
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/basic-main.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/basic-main.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/basic-main.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/basic-main.c	2016-08-08 20:37:53.550581527 +0200
@@ -1,9 +1,10 @@
 /* This is a basic main function test program.  */
 
-/* { dg-do run }  */
-/* { dg-options "-O0" }  */
+/* { dg-do run } */
+/* { dg-options "-O0" } */
 
-int main(void)
+int
+main (void)
 {
   return 0;
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-cctl.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-cctl.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-cctl.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-cctl.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,29 @@
+/* Verify that we generate cache control instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */
+/* { dg-final { scan-assembler "L1D_VA_INVAL" } } */
+/* { dg-final { scan-assembler "L1D_INVALALL" } } */
+/* { dg-final { scan-assembler "L1D_IX_WWD" } } */
+/* { dg-final { scan-assembler "L1D_IX_RWD" } } */
+/* { dg-final { scan-assembler "PFM_CTL" } } */
+/* { dg-final { scan-assembler "PFM_CTL" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int va = 0;
+
+  __nds32__cctlva_lck (NDS32_CCTL_L1D_VA_FILLCK, &va);
+  __nds32__cctlidx_wbinval (NDS32_CCTL_L1D_IX_WBINVAL, va);
+  __nds32__cctlva_wbinval_alvl (NDS32_CCTL_L1D_VA_INVAL, &va);
+  __nds32__cctlva_wbinval_one_lvl (NDS32_CCTL_L1D_VA_INVAL, &va);
+  __nds32__cctl_l1d_invalall ();
+  __nds32__cctlidx_write (NDS32_CCTL_L1D_IX_WWD, va, 1);
+  __nds32__cctlidx_read (NDS32_CCTL_L1D_IX_RWD, 1);
+  __nds32__mtusr (0, NDS32_USR_PFM_CTL);
+  __nds32__mfusr (NDS32_USR_PFM_CTL);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-dpref.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-dpref.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-dpref.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-dpref.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,24 @@
+/* Verify that we generate data prefetch instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "dpref\\tSRD" } } */
+/* { dg-final { scan-assembler "dpref\\tSRD" } } */
+/* { dg-final { scan-assembler "dpref\\tSRD" } } */
+/* { dg-final { scan-assembler "dpref\\tSRD" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned char dpref_q = 0;
+  unsigned short dpref_h = 0;
+  unsigned int dpref_w = 0;
+  unsigned long long dpref_dw = 0;
+
+  __nds32__dpref_qw (&dpref_q, 0, NDS32_DPREF_SRD);
+  __nds32__dpref_hw (&dpref_h, 0, NDS32_DPREF_SRD);
+  __nds32__dpref_w (&dpref_w, 0, NDS32_DPREF_SRD);
+  __nds32__dpref_dw (&dpref_dw, 0, NDS32_DPREF_SRD);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyd.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for fcpysd instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu_dp } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  double da = -1.5;
+  double db = 1.3;
+  double dr = __nds32__fcpysd (da, db);
+
+  if (dr != 1.5)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpynd.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for fcpynsd instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu_dp } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  double da = -1.5;
+  double db = -1.3;
+  double dr =  __nds32__fcpynsd (da, db);
+
+  if (dr != 1.5)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpyns.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for fcpynss instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu_sp } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  float a = -1.5;
+  float b = -1.3;
+  float r = __nds32__fcpynss (a, b);
+
+  if (r != 1.5)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fcpys.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for fcpyss instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu_sp } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  float a = -1.5;
+  float b = 1.3;
+  float r = __nds32__fcpyss (a, b);
+
+  if (r != 1.5)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fmfcfg.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,23 @@
+/* This is a test program for fmfcfg instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int intrinsic_fmfcfg = -1;
+  unsigned int inline_assemble_fmfcfg = -2;
+
+  intrinsic_fmfcfg = __nds32__fmfcfg ();
+  __asm volatile ("fmfcfg %0" : "=r" (inline_assemble_fmfcfg));
+
+  if (intrinsic_fmfcfg == inline_assemble_fmfcfg)
+    exit (0);
+  else
+    abort ();
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-fpu-fpcsr.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,28 @@
+/* This is a test program for fmtcsr/fmfcsr instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_fpu } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int fpcsr;
+
+  /* write fpcsr */
+  fpcsr = 3;
+  __nds32__fmtcsr (fpcsr);
+
+  /* read fpcsr */
+  fpcsr = 0;
+  fpcsr = __nds32__fmfcsr ();
+  fpcsr = fpcsr & 0x00001fff;
+
+  if (fpcsr == 3)
+    exit (0);
+  else
+   abort ();
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-get-lp.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,22 @@
+/* Verify the return address with builtin function.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int main()
+{
+  unsigned int intrinsic_lp = -1;
+  unsigned int inline_assemble_lp = -2;
+
+  intrinsic_lp = __nds32__return_address ();
+
+  __asm volatile ("mov55 %0, $lp" : "=r" (inline_assemble_lp));
+
+  if (intrinsic_lp != inline_assemble_lp)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-isb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-isb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-isb.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-isb.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,11 +1,13 @@
 /* Verify that we generate isb instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tisb" } }  */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tisb" } } */
+
+#include <nds32_intrinsic.h>
 
 void
 test (void)
 {
-  __builtin_nds32_isb ();
+  __nds32__isb ();
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-isync.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-isync.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-isync.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-isync.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,12 +1,14 @@
 /* Verify that we generate isync instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tisync" } }  */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tisync" } } */
+
+#include <nds32_intrinsic.h>
 
 void
 test (void)
 {
   int *addr = (int *) 0x53000000;
-  __builtin_nds32_isync (addr);
+  __nds32__isync (addr);
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-load-store.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-load-store.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-load-store.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-load-store.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,25 @@
+/* Verify that we generate llw/lwup/scw/swup instruction
+   with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target nds32_no_v3m } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tllw" } } */
+/* { dg-final { scan-assembler "\\tlwup" } } */
+/* { dg-final { scan-assembler "\\tscw" } } */
+/* { dg-final { scan-assembler "\\tswup" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int a = 0;
+  int b = 0;
+  unsigned int cc = 0;
+
+  __nds32__llw (&a);
+  cc = __nds32__lwup (&a);
+  __nds32__scw (&a, b);
+  __nds32__swup (&a, b);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-lto.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-lto.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-lto.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-lto.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,28 @@
+/* Verify that we use -flto option to generate instructions
+   with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0 -flto" } */
+/* { dg-final { scan-assembler "\\tdsb" } } */
+/* { dg-final { scan-assembler "\\tisb" } } */
+/* { dg-final { scan-assembler "\\tmsync\\tall" } } */
+/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */
+/* { dg-final { scan-assembler "\\tnop" } } */
+/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */
+/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */
+/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  __nds32__dsb ();
+  __nds32__isb ();
+  __nds32__msync_all ();
+  __nds32__msync_store ();
+  __nds32__nop ();
+  __nds32__standby_no_wake_grant ();
+  __nds32__standby_wake_grant ();
+  __nds32__standby_wait_done ();
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-machine-sva.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-machine-sva.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-machine-sva.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-machine-sva.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,16 @@
+/* Verify that we generate sva instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsva" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int a, b;
+  char c;
+
+  c = __nds32__sva (a, b);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-machine-svs.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-machine-svs.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-machine-svs.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-machine-svs.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,16 @@
+/* Verify that we generate svs instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsvs" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int a, b;
+  char c;
+
+  c = __nds32__svs (a, b);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,9 +1,9 @@
 /* Verify that we generate mfsr/mtsr instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tmfsr" } }  */
-/* { dg-final { scan-assembler "\\tmtsr" } }  */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tmfsr" } } */
+/* { dg-final { scan-assembler "\\tmtsr" } } */
 
 #include <nds32_intrinsic.h>
 
@@ -12,6 +12,6 @@
 {
   int ipsw_value;
 
-  ipsw_value = __builtin_nds32_mfsr (__NDS32_REG_IPSW__);
-  __builtin_nds32_mtsr (ipsw_value, __NDS32_REG_IPSW__);
+  ipsw_value = __nds32__mfsr (__NDS32_REG_IPSW__);
+  __nds32__mtsr (ipsw_value, __NDS32_REG_IPSW__);
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,9 +1,9 @@
 /* Verify that we generate mfusr/mtusr instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tmfusr" } }  */
-/* { dg-final { scan-assembler "\\tmtusr" } }  */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tmfusr" } } */
+/* { dg-final { scan-assembler "\\tmtusr" } } */
 
 #include <nds32_intrinsic.h>
 
@@ -12,6 +12,6 @@
 {
   int itype_value;
 
-  itype_value = __builtin_nds32_mfusr (__NDS32_REG_ITYPE__);
-  __builtin_nds32_mtusr (itype_value, __NDS32_REG_ITYPE__);
+  itype_value = __nds32__mfusr (__NDS32_REG_ITYPE__);
+  __nds32__mtusr (itype_value, __NDS32_REG_ITYPE__);
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-misc.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-misc.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-misc.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-misc.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,39 @@
+/* Verify that we generate other instructions with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tbreak" } } */
+/* { dg-final { scan-assembler "\\tdsb" } } */
+/* { dg-final { scan-assembler "\\tisb" } } */
+/* { dg-final { scan-assembler "\\tisync" } } */
+/* { dg-final { scan-assembler "\\tmsync\\tall" } } */
+/* { dg-final { scan-assembler "\\tmsync\\tstore" } } */
+/* { dg-final { scan-assembler "\\tnop" } } */
+/* { dg-final { scan-assembler "\\tstandby\\tno_wake_grant" } } */
+/* { dg-final { scan-assembler "\\tstandby\\twake_grant" } } */
+/* { dg-final { scan-assembler "\\tstandby\\twait_done" } } */
+/* { dg-final { scan-assembler "\\tteqz" } } */
+/* { dg-final { scan-assembler "\\ttnez" } } */
+/* { dg-final { scan-assembler "\\ttrap" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int a = 0;
+
+  __nds32__break (2);
+  __nds32__dsb ();
+  __nds32__isb ();
+  __nds32__isync (&a);
+  __nds32__msync_all ();
+  __nds32__msync_store ();
+  __nds32__nop ();
+  __nds32__standby_no_wake_grant ();
+  __nds32__standby_wake_grant ();
+  __nds32__standby_wait_done ();
+  __nds32__teqz (a, 2);
+  __nds32__tnez (a, 2);
+  __nds32__trap (2);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mtsr-dsb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mtsr-dsb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mtsr-dsb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mtsr-dsb.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,14 @@
+/* Verify that we generate mtsr and dsb instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tmtsr" } } */
+/* { dg-final { scan-assembler "\\tdsb" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__mtsr_dsb (1, NDS32_SR_ILMB);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mtsr-isb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mtsr-isb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-mtsr-isb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-mtsr-isb.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,14 @@
+/* Verify that we generate mtsr and isb instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tmtsr" } } */
+/* { dg-final { scan-assembler "\\tisb" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__mtsr_isb (1, NDS32_SR_ILMB);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-abs.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for abs instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a = -4;
+  int abs = __nds32__abs (a);
+
+  if (abs != 4)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-ave.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for ave instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a = 4;
+  int b = 2;
+  int ave = __nds32__ave (a, b);
+
+  if (ave != 3)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-bclr.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for bclr instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a = 1;
+  int c = __nds32__bclr (a, 0);
+
+  if (c != 0)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-bset.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for bset instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int c = 0;
+  c = __nds32__bset (c, 0);
+
+  if (c != 1)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-btgl.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for btgl instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a = 1;
+  int c = __nds32__btgl (1, 0);
+
+  if (c != 0)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-btst.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for btst instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int c = 1;
+  c = __nds32__btst (c, 0);
+
+  if (c != 1)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clip.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for clip instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int c = 33;
+  c = __nds32__clip (c, 5);
+
+  if (c != 31)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clips.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for clips instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a = -33;
+  int c = __nds32__clips (a, 5);
+
+  if (c != -32)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clo.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for clo instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int c = 0xFFFF0000;
+  c =  __nds32__clo (c);
+
+  if (c != 16)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE1-clz.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,20 @@
+/* This is a test program for clz instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int c = 0x0000FFFF;
+  c =  __nds32__clz (c);
+
+  if (c != 16)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-bse.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,28 @@
+/* This is a test program for bse instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf2 } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0xF0F0F0F0;
+  unsigned int b = 0x00000300;
+  unsigned int r = 0;
+
+  unsigned int verify_b = 0x00000300;
+  unsigned int verify_r = 0;
+
+  __nds32__bse (&r, a, &b);
+  a = 0xF0F0F0F0;
+  asm volatile ("bse %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a));
+
+  if ((verify_b == b) && (verify_r == r))
+    exit (0);
+  else
+    abort ();
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-bsp.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,26 @@
+/* This is a test program for bsp instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf2 } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x0000000F;
+  unsigned int b = 0x00000300;
+  unsigned int r = 0;
+  unsigned int verify_b = 0x00000300;
+  unsigned int verify_r = 0;
+
+  __nds32__bsp (&r, a, &b);
+  asm volatile ("bsp %0, %2, %1": "+&r" (verify_r), "+&r" (verify_b) : "r" (a));
+
+  if ((verify_b == b) && (verify_r == r))
+    exit (0);
+  else
+    abort ();
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsada.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,23 @@
+/* This is a test program for pbsada instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf2 } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x09070605;
+  unsigned int b = 0x04020301;
+  unsigned int r = 1;
+
+  r = __nds32__pbsada(r, a, b);
+
+  if (r != 18)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-PE2-pbsad.c	2016-08-08 20:37:53.550581527 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for pbsad instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_perf2 } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x09070605;
+  unsigned int b = 0x04020301;
+  unsigned int r = __nds32__pbsad (a, b);
+
+  if (r != 17)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-rotr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-rotr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-rotr.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-rotr.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,19 @@
+/* This is a test program for rotr instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 1;
+  a = __nds32__rotr (a, 30);
+
+  if (a != 4)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,11 +1,13 @@
 /* Verify that we generate setgie.d instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tsetgie.d" } }  */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsetgie.d" } } */
+
+#include <nds32_intrinsic.h>
 
 void
 test (void)
 {
-  __builtin_nds32_setgie_dis ();
+  __nds32__setgie_dis ();
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c	2013-12-03 11:58:05.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c	2016-08-08 20:37:53.554581681 +0200
@@ -1,11 +1,13 @@
 /* Verify that we generate setgie.e instruction with builtin function.  */
 
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tsetgie.e" } }  */
+/* { dg-do compile */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsetgie.e" } } */
+
+#include <nds32_intrinsic.h>
 
 void
 test (void)
 {
-  __builtin_nds32_setgie_en ();
+  __nds32__setgie_en ();
 }
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,36 @@
+/* This is a test program for checking gie with
+   mtsr/mfsr instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int psw;
+  unsigned int gie;
+  unsigned int pfm_ctl;
+
+  __nds32__setgie_en ();
+  __nds32__dsb(); /* This is needed for waiting pipeline.  */
+  psw = __nds32__mfsr (NDS32_SR_PSW);
+
+  gie = psw & 0x00000001;
+
+  if (gie != 1)
+    abort ();
+
+  psw = psw & 0xFFFFFFFE;
+  __nds32__mtsr (psw,NDS32_SR_PSW);
+  __nds32__dsb(); /* This is needed for waiting pipeline.  */
+  psw = __nds32__mfsr (NDS32_SR_PSW);
+  gie = psw & 0x00000001;
+
+  if (gie != 0)
+    abort ();
+  else
+   exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-sp.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-sp.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-sp.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-sp.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,33 @@
+/* This is a test program for sp intrinsic usage.
+   Because we want to use frame pointer to access local variable,
+   we need to use -fno-omit-frame-pointer to make sure the existence
+   of frame pointer.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0 -fno-omit-frame-pointer" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int old_sp, new_sp;
+
+  old_sp = __nds32__get_current_sp ();
+  new_sp = old_sp - 4;
+  __nds32__set_current_sp (new_sp);
+  new_sp = __nds32__get_current_sp ();
+
+  if (new_sp != (old_sp - 4))
+    abort ();
+
+  new_sp = new_sp + 4;
+  __nds32__set_current_sp (new_sp);
+  new_sp = __nds32__get_current_sp ();
+
+  if (new_sp != old_sp)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-ffb.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,28 @@
+/* This is a test program for ffb instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_string } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x1b2a3d4c;
+  unsigned int b = 0x0000003d;
+  int r;
+
+  r =  __nds32__ffb (a, b);
+
+#ifdef __NDS32_EL__
+  if (r != -3)
+    abort ();
+#else
+  if (r != -2)
+    abort ();
+#endif
+
+  exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-ffmism.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,28 @@
+/* This is a test program for ffmism instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_string } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x1b2a3d4c;
+  unsigned int b = 0x112a334c;
+  int r;
+
+  r = __nds32__ffmism (a, b);
+
+#ifdef __NDS32_EL__
+  if (r != -3)
+    abort ();
+#else
+  if (r != -4)
+    abort ();
+#endif
+
+  exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-string-flmism.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,28 @@
+/* This is a test program for flmism instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+/* { dg-require-effective-target nds32_ext_string } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x1b2a3d4c;
+  unsigned int b = 0x112a334c;
+  int r;
+
+  r = __nds32__flmism (a, b);
+
+#ifdef __NDS32_EL__
+  if (r != -1)
+    abort ();
+#else
+  if (r != -2)
+    abort ();
+#endif
+
+  exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add16.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kadd16" } } */
+/* { dg-final { scan-assembler "kadd16" } } */
+/* { dg-final { scan-assembler "ukadd16" } } */
+/* { dg-final { scan-assembler "ukadd16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va, vb;
+  uint16x2_t v_ur, v_ua, v_ub;
+
+  r = __nds32__kadd16 (a, b);
+  vr = __nds32__v_kadd16 (va, vb);
+
+  r = __nds32__ukadd16 (a, b);
+  v_ur = __nds32__v_ukadd16 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add64.c	2016-08-08 20:37:53.554581681 +0200
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kadd64" } } */
+/* { dg-final { scan-assembler "ukadd64" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  long long r, a, b;
+  unsigned long long ur, ua, ub;
+
+  r = __nds32__kadd64 (a, b);
+  ur = __nds32__ukadd64 (ua, ub);
+
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-add8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-add8.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kadd8" } } */
+/* { dg-final { scan-assembler "kadd8" } } */
+/* { dg-final { scan-assembler "ukadd8" } } */
+/* { dg-final { scan-assembler "ukadd8" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int8x4_t vr, va, vb;
+  uint8x4_t v_ur, v_ua, v_ub;
+
+  r = __nds32__kadd8 (a, b);
+  vr = __nds32__v_kadd8 (va, vb);
+
+  r = __nds32__ukadd8 (a, b);
+  v_ur = __nds32__v_ukadd8 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-cras16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-cras16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-cras16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-cras16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kcras16" } } */
+/* { dg-final { scan-assembler "kcras16" } } */
+/* { dg-final { scan-assembler "ukcras16" } } */
+/* { dg-final { scan-assembler "ukcras16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va, vb;
+  uint16x2_t v_ur, v_ua, v_ub;
+
+  r = __nds32__kcras16 (a, b);
+  vr = __nds32__v_kcras16 (va, vb);
+
+  r = __nds32__ukcras16 (a, b);
+  v_ur = __nds32__v_ukcras16 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-crsa16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-crsa16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-crsa16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-crsa16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kcrsa16" } } */
+/* { dg-final { scan-assembler "kcrsa16" } } */
+/* { dg-final { scan-assembler "ukcrsa16" } } */
+/* { dg-final { scan-assembler "ukcrsa16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va, vb;
+  uint16x2_t v_ur, v_ua, v_ub;
+
+  r = __nds32__kcrsa16 (a, b);
+  vr = __nds32__v_kcrsa16 (va, vb);
+
+  r = __nds32__ukcrsa16 (a, b);
+  v_ur = __nds32__v_ukcrsa16 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-kabs8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-kabs8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-kabs8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-kabs8.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kabs8" } } */
+/* { dg-final { scan-assembler "kabs8" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a;
+  int8x4_t vr, va;
+
+  r = __nds32__kabs8 (a);
+  vr = __nds32__v_kabs8 (va);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "ksll16" } } */
+/* { dg-final { scan-assembler "ksll16" } } */
+/* { dg-final { scan-assembler "kslli16" } } */
+/* { dg-final { scan-assembler "kslli16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va;
+
+  r = __nds32__ksll16 (a, b);
+  vr = __nds32__v_ksll16 (va, b);
+
+  r = __nds32__ksll16 (a, 0);
+  vr = __nds32__v_ksll16 (va, 0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-ksll.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "ksll" } } */
+/* { dg-final { scan-assembler "kslli" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int r, a;
+  unsigned int b;
+
+  r = __nds32__ksll (a, b);
+  r = __nds32__ksll (a, 0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-kslrawu.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-kslrawu.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-kslrawu.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-kslrawu.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kslraw.u" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int r, a;
+  unsigned int b;
+
+  r = __nds32__kslraw_u (a, b);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-mar64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-mar64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-mar64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-mar64.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kmar64" } } */
+/* { dg-final { scan-assembler "ukmar64" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  long long r, a, b;
+  unsigned long long ur, ua, ub;
+
+  r = __nds32__kmar64 (r, a, b);
+  ur = __nds32__ukmar64 (ur, ua, ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-misc16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-misc16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-misc16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-misc16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "sclip16" } } */
+/* { dg-final { scan-assembler "sclip16" } } */
+/* { dg-final { scan-assembler "uclip16" } } */
+/* { dg-final { scan-assembler "uclip16" } } */
+/* { dg-final { scan-assembler "khm16" } } */
+/* { dg-final { scan-assembler "khm16" } } */
+/* { dg-final { scan-assembler "khmx16" } } */
+/* { dg-final { scan-assembler "khmx16" } } */
+/* { dg-final { scan-assembler "kabs16" } } */
+/* { dg-final { scan-assembler "kabs16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va, vb;
+
+  r = __nds32__sclip16 (a, 0);
+  vr = __nds32__v_sclip16 (va, 0);
+
+  r = __nds32__uclip16 (a, 0);
+  vr = __nds32__v_uclip16 (va, 0);
+
+  r = __nds32__khm16 (a, b);
+  vr = __nds32__v_khm16 (va, vb);
+
+  r = __nds32__khmx16 (a, b);
+  vr = __nds32__v_khmx16 (va, vb);
+
+  r = __nds32__kabs16 (a);
+  vr = __nds32__v_kabs16 (va);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msr64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msr64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msr64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msr64.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kmsr64" } } */
+/* { dg-final { scan-assembler "ukmsr64" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  long long r, a, b;
+  unsigned long long ur, ua, ub;
+
+  r = __nds32__kmsr64 (r, a, b);
+  ur = __nds32__ukmsr64 (ur, ua, ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msw16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msw16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msw16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msw16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kmmawb" } } */
+/* { dg-final { scan-assembler "kmmawb" } } */
+/* { dg-final { scan-assembler "kmmawb.u" } } */
+/* { dg-final { scan-assembler "kmmawb.u" } } */
+/* { dg-final { scan-assembler "kmmawt" } } */
+/* { dg-final { scan-assembler "kmmawt" } } */
+/* { dg-final { scan-assembler "kmmawt.u" } } */
+/* { dg-final { scan-assembler "kmmawt.u" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int r, a;
+  unsigned int b;
+  int16x2_t vb;
+
+  r = __nds32__kmmawb (r, a, b);
+  r = __nds32__v_kmmawb (r, a, vb);
+
+  r = __nds32__kmmawb_u (r, a, b);
+  r = __nds32__v_kmmawb_u (r, a, vb);
+
+  r = __nds32__kmmawt (r, a, b);
+  r = __nds32__v_kmmawt (r, a, vb);
+
+  r = __nds32__kmmawt_u (r, a, b);
+  r = __nds32__v_kmmawt_u (r, a, vb);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msw32.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msw32.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-msw32.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-msw32.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kmmac" } } */
+/* { dg-final { scan-assembler "kmmac.u" } } */
+/* { dg-final { scan-assembler "kmmsb" } } */
+/* { dg-final { scan-assembler "kmmsb.u" } } */
+/* { dg-final { scan-assembler "kwmmul" } } */
+/* { dg-final { scan-assembler "kwmmul.u" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int r, a, b;
+  r = __nds32__kmmac (r, a, b);
+  r = __nds32__kmmac_u (r, a, b);
+
+  r = __nds32__kmmsb (r, a, b);
+  r = __nds32__kmmsb_u (r, a, b);
+
+  r = __nds32__kwmmul (a, b);
+  r = __nds32__kwmmul_u (a, b);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-smul16x32.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-smul16x32.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-smul16x32.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-smul16x32.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "kmda" } } */
+/* { dg-final { scan-assembler "kmda" } } */
+/* { dg-final { scan-assembler "kmxda" } } */
+/* { dg-final { scan-assembler "kmxda" } } */
+/* { dg-final { scan-assembler "kmabb" } } */
+/* { dg-final { scan-assembler "kmabb" } } */
+/* { dg-final { scan-assembler "kmabt" } } */
+/* { dg-final { scan-assembler "kmabt" } } */
+/* { dg-final { scan-assembler "kmatt" } } */
+/* { dg-final { scan-assembler "kmatt" } } */
+/* { dg-final { scan-assembler "kmada" } } */
+/* { dg-final { scan-assembler "kmada" } } */
+/* { dg-final { scan-assembler "kmaxda" } } */
+/* { dg-final { scan-assembler "kmaxda" } } */
+/* { dg-final { scan-assembler "kmads" } } */
+/* { dg-final { scan-assembler "kmads" } } */
+/* { dg-final { scan-assembler "kmadrs" } } */
+/* { dg-final { scan-assembler "kmadrs" } } */
+/* { dg-final { scan-assembler "kmaxds" } } */
+/* { dg-final { scan-assembler "kmaxds" } } */
+/* { dg-final { scan-assembler "kmsda" } } */
+/* { dg-final { scan-assembler "kmsda" } } */
+/* { dg-final { scan-assembler "kmsxda" } } */
+/* { dg-final { scan-assembler "kmsxda" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  int r;
+  unsigned int a, b;
+  int16x2_t va, vb;
+
+  r = __nds32__kmda (a, b);
+  r = __nds32__v_kmda (va, vb);
+
+  r = __nds32__kmxda (a, b);
+  r = __nds32__v_kmxda (va, vb);
+
+  r = __nds32__kmabb (r, a, b);
+  r = __nds32__v_kmabb (r, va, vb);
+
+  r = __nds32__kmabt (r, a, b);
+  r = __nds32__v_kmabt (r, va, vb);
+
+  r = __nds32__kmatt (r, a, b);
+  r = __nds32__v_kmatt (r, va, vb);
+
+  r = __nds32__kmada (r, a, b);
+  r = __nds32__v_kmada (r, va, vb);
+
+  r = __nds32__kmaxda (r, a, b);
+  r = __nds32__v_kmaxda (r, va, vb);
+
+  r = __nds32__kmads (r, a, b);
+  r = __nds32__v_kmads (r, va, vb);
+
+  r = __nds32__kmadrs (r, a, b);
+  r = __nds32__v_kmadrs (r, va, vb);
+
+  r = __nds32__kmaxds (r, a, b);
+  r = __nds32__v_kmaxds (r, va, vb);
+
+  r = __nds32__kmsda (r, a, b);
+  r = __nds32__v_kmsda (r, va, vb);
+
+  r = __nds32__kmsxda (r, a, b);
+  r = __nds32__v_kmsxda (r, va, vb);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "ksub16" } } */
+/* { dg-final { scan-assembler "ksub16" } } */
+/* { dg-final { scan-assembler "uksub16" } } */
+/* { dg-final { scan-assembler "uksub16" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int16x2_t vr, va, vb;
+  uint16x2_t v_ur, v_ua, v_ub;
+
+  r = __nds32__ksub16 (a, b);
+  vr = __nds32__v_ksub16 (va, vb);
+
+  r = __nds32__uksub16 (a, b);
+  v_ur = __nds32__v_uksub16 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub64.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "ksub64" } } */
+/* { dg-final { scan-assembler "uksub64" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  long long r, a, b;
+  unsigned long long ur, ua, ub;
+
+  r = __nds32__ksub64 (a, b);
+  ur = __nds32__uksub64 (ua, ub);
+
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-stura-sub8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-stura-sub8.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mext-dsp" } */
+/* { dg-final { scan-assembler "ksub8" } } */
+/* { dg-final { scan-assembler "ksub8" } } */
+/* { dg-final { scan-assembler "uksub8" } } */
+/* { dg-final { scan-assembler "uksub8" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  unsigned int r, a, b;
+  int8x4_t vr, va, vb;
+  uint8x4_t v_ur, v_ua, v_ub;
+
+  r = __nds32__ksub8 (a, b);
+  vr = __nds32__v_ksub8 (va, vb);
+
+  r = __nds32__uksub8 (a, b);
+  v_ur = __nds32__v_uksub8 (v_ua, v_ub);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_dw.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,31 @@
+/* This is a test program for unaligned double word access.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0 -std=c99" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned char data[] = {0x55, 0x66, 0x77, 0x88, 0xAA,
+			  0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+  unsigned long long* long_long_data = (unsigned long long*) & data[1];
+  unsigned long long test_long_long = 0x1122334455667788LL;
+
+#ifdef __NDS32_EL__
+  if (__nds32__get_unaligned_dw (long_long_data) != 0xEEDDCCBBAA887766LL)
+    abort ();
+#else
+  if (__nds32__get_unaligned_dw (long_long_data) != 0x667788AABBCCDDEELL)
+    abort ();
+#endif
+
+  __nds32__put_unaligned_dw (long_long_data, test_long_long);
+
+  if (__nds32__get_unaligned_dw (long_long_data) != 0x1122334455667788LL)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_hw.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,30 @@
+/* This is a test program for unaligned half word access.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned char data[] = {0x55,0x66,0x77,0x88};
+  unsigned short* short_data = (unsigned short*)& data[1];
+  unsigned short test_short = 0x5566;
+
+#ifdef __NDS32_EL__
+  if (__nds32__get_unaligned_hw (short_data) != 0x7766)
+    abort ();
+#else
+  if (__nds32__get_unaligned_hw (short_data) != 0x6677)
+    abort ();
+#endif
+
+  __nds32__put_unaligned_hw (short_data, test_short);
+
+  if (__nds32__get_unaligned_hw (short_data) != 0x5566)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-unaligned_w.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,30 @@
+/* This is a test program for unaligned word access.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0 -std=c99" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned char data[] = {0x55,0x66,0x77,0x88,0xAA,0xBB,0xCC,0xDD};
+  unsigned int* int_data = (unsigned int*)& data[1];
+  unsigned int test_int = 0x55667788;
+
+#ifdef __NDS32_EL__
+  if (__nds32__get_unaligned_w (int_data) != 0xAA887766)
+    abort ();
+#else
+  if (__nds32__get_unaligned_w (int_data) != 0x667788AA)
+    abort ();
+#endif
+
+  __nds32__put_unaligned_w (int_data, test_int);
+
+  if (__nds32__get_unaligned_w (int_data) != 0x55667788)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/builtin-wsbh.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,21 @@
+/* This is a test program for wsbh instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int a = 0x03020100;
+  unsigned int b;
+
+  b = __nds32__wsbh (a);
+
+  if (b != 0x02030001)
+    abort ();
+  else
+    exit (0);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,49 @@
+/* This is a test program for add16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int add16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__add16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_uadd16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_uadd16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sadd16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_sadd16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = add16 (0x0001f000, 0x00011000);
+  uint16x2_t v_ua = v_uadd16 ((uint16x2_t) {0xf000, 0xf000},
+			      (uint16x2_t) {0x1000, 0x2000});
+  int16x2_t v_sa = v_sadd16 ((int16x2_t) {0xf777, 0xf111},
+			     (int16x2_t) {0x1000, 0x2000});
+
+  if (a != 0x00020000)
+    abort ();
+  else if (v_ua[0] != 0x0000
+	   || v_ua[1] != 0x1000)
+    abort ();
+  else if (v_sa[0] != 0x0777
+	   || v_sa[1] != 0x1111)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add64.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,36 @@
+/* This is a test program for add64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long sadd64 (long long ra, long long rb)
+{
+  return __nds32__sadd64 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+unsigned long long uadd64 (unsigned long long ra, unsigned long long rb)
+{
+  return __nds32__uadd64 (ra, rb);
+}
+
+int
+main ()
+{
+  long long sa = sadd64 (0x1122334400000000ll, 0x55667788ll);
+  unsigned long long ua = uadd64 (0xffff00000000ull, 0x55667788ull);
+
+  if (sa != 0x1122334455667788ll)
+    abort ();
+  else if (ua != 0xffff55667788ull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-add8.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,53 @@
+/* This is a test program for add8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int add8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__add8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_uadd8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_uadd8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int8x4_t v_sadd8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_sadd8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = add8 (0x11223344, 0x55667788);
+  uint8x4_t v_ua = v_uadd8 ((uint8x4_t) {0xff, 0xee, 0xdd, 0xcc},
+			    (uint8x4_t) {0x1, 0xee, 0xdd, 0xcc});
+  int8x4_t v_sa = v_sadd8 ((int8x4_t) {0x80, 0x7f, 0xbb, 0xaa},
+			   (int8x4_t) {0x80, 0x7f, 0xbb, 0xaa});
+
+  if (a != 0x6688aacc)
+    abort ();
+  else if (v_ua[0] != 0
+	   || v_ua[1] != 0xdc
+	   || v_ua[2] != 0xba
+	   || v_ua[3] != 0x98)
+    abort ();
+  else if (v_sa[0] != 0
+	   || v_sa[1] != (char) 0xfe
+	   || v_sa[2] != 0x76
+	   || v_sa[3] != 0x54)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bitrev.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bitrev.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bitrev.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bitrev.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for bitrev instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int bitrev (unsigned int ra, unsigned int rb)
+{
+  return __nds32__bitrev (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = bitrev (0xd, 1);
+
+  if (a != 0x2)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bpick.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bpick.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bpick.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-bpick.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for bpick instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int bpick (unsigned int ra, unsigned int rb, unsigned int rc)
+{
+  return __nds32__bpick (ra, rb, rc);
+}
+
+int
+main ()
+{
+  unsigned int a = bpick (0x11223344, 0x11332244, 0);
+
+  if (a != 0x11332244)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,49 @@
+/* This is a test program for cmpeq16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int cmpeq16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__cmpeq16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_scmpeq16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_scmpeq16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ucmpeq16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ucmpeq16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = cmpeq16 (0xffff0000, 0xffff0001);
+  uint16x2_t v_sa = v_scmpeq16 ((int16x2_t) {0x7fff, 0x8000},
+				(int16x2_t) {0x8000, 0x8000});
+  uint16x2_t v_ua = v_ucmpeq16 ((uint16x2_t) {0x7fff, 0x8000},
+				(uint16x2_t) {0x8000, 0x8000});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (v_sa[0] != 0
+	   || v_sa[1] != 0xffff)
+    abort ();
+  else if (v_ua[0] != 0
+	   || v_ua[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cmpeq8.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,53 @@
+/* This is a test program for cmpeq8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int cmpeq8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__cmpeq8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_scmpeq8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_scmpeq8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_ucmpeq8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_ucmpeq8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = cmpeq8 (0xffff0000, 0xffff0101);
+  uint8x4_t v_sa = v_scmpeq8 ((int8x4_t) { 0x7f, 0x7f, 0x01, 0x01},
+			      (int8x4_t) { 0x7f, 0x7f, 0x00, 0x00});
+  uint8x4_t v_ua = v_ucmpeq8 ((uint8x4_t) { 0x7f, 0x7f, 0x01, 0x01},
+			      (uint8x4_t) { 0x7f, 0x7f, 0x00, 0x00});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (v_sa[0] != 0xff
+           || v_sa[1] != 0xff
+           || v_sa[2] != 0
+	   || v_sa[3] != 0)
+    abort ();
+  else if (v_ua[0] != 0xff
+           || v_ua[1] != 0xff
+           || v_ua[2] != 0
+	   || v_ua[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cras16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cras16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cras16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-cras16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,58 @@
+/* This is a test program for cras16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int cras16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__cras16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ucras16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ucras16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_scras16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_scras16 (ra, rb);
+}
+
+int
+main ()
+{
+
+#ifdef __NDS32_EL__
+  uint16x2_t v_ua_p = {1, 0};
+  int16x2_t v_sa_p = {0x1000, 0x111};
+#else
+  uint16x2_t v_ua_p = {0x2469, 0xe000};
+  int16x2_t v_sa_p = {0x3000, 0xe111};
+#endif
+
+  unsigned int a = cras16 (0x0001f000, 0x0001f000);
+  uint16x2_t v_ua = v_ucras16 ((uint16x2_t) {0x1235, 0xf000},
+			       (uint16x2_t) {0x1000, 0x1234});
+  int16x2_t v_sa = v_scras16 ((int16x2_t) {0x2000, 0xf111},
+			      (int16x2_t) {0x1000, 0x1000});
+
+  if (a != 0xf001efff)
+    abort ();
+  else if (v_ua[0] != v_ua_p[0]
+	   || v_ua[1] != v_ua_p[1])
+    abort ();
+  else if (v_sa[0] != v_sa_p[0]
+	   || v_sa[1] != v_sa_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-crsa16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-crsa16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-crsa16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-crsa16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,57 @@
+/* This is a test program for crsa16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int crsa16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__crsa16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ucrsa16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ucrsa16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_scrsa16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_scrsa16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t v_ua_p = {0x2469, 0xe000};
+  int16x2_t v_sa_p = {0x3000, 0x110};
+#else
+  uint16x2_t v_ua_p = {1, 0};
+  int16x2_t v_sa_p = {0x1000, 0x112};
+#endif
+
+  unsigned int a = crsa16 (0x0001f000, 0x0001f000);
+  uint16x2_t v_ua = v_ucrsa16 ((uint16x2_t) {0x1235, 0xf000},
+			       (uint16x2_t) {0x1000, 0x1234});
+  int16x2_t v_sa = v_scrsa16 ((int16x2_t) {0x2000, 0x0111},
+			      (int16x2_t) {0x0001, 0x1000});
+
+  if (a != 0x1001f001)
+    abort ();
+  else if (v_ua[0] != v_ua_p[0]
+	   || v_ua[1] != v_ua_p[1])
+    abort ();
+  else if (v_sa[0] != v_sa_p[0]
+	   || v_sa[1] != v_sa_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-insb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-insb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-insb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-insb.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for insb instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int insb (unsigned int ra, unsigned int rb)
+{
+  return __nds32__insb (ra, rb, 1);
+}
+
+int
+main ()
+{
+  unsigned int a = insb (0x11220044, 0x33);
+
+  if (a != 0x11223344)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbb16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbb16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbb16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbb16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for pkbb16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int pkbb16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__pkbb16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_pkbb16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_pkbb16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0xcccc, 0xaaaa};
+#else
+  uint16x2_t va_p = {0xbbbb, 0xdddd};
+#endif
+
+  unsigned int a = pkbb16 (0x11223344, 0x55667788);
+  uint16x2_t va = v_pkbb16 ((uint16x2_t) {0xaaaa, 0xbbbb},
+			    (uint16x2_t) {0xcccc, 0xdddd});
+
+  if (a != 0x33447788)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbt16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbt16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbt16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pkbt16.c	2016-08-08 20:37:53.558581836 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for pkbt16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int pkbt16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__pkbt16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_pkbt16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_pkbt16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0xdddd, 0xaaaa};
+#else
+  uint16x2_t va_p = {0xbbbb, 0xcccc};
+#endif
+
+  unsigned int a = pkbt16 (0x11223344, 0x55667788);
+  uint16x2_t va = v_pkbt16 ((uint16x2_t) {0xaaaa, 0xbbbb},
+			    (uint16x2_t) {0xcccc, 0xdddd});
+
+  if (a != 0x33445566)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktb16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktb16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktb16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktb16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for pktb16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int pktb16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__pktb16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_pktb16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_pktb16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0xcccc, 0xbbbb};
+#else
+  uint16x2_t va_p = {0xaaaa, 0xdddd};
+#endif
+
+  unsigned int a = pktb16 (0x11223344, 0x55667788);
+  uint16x2_t va = v_pktb16 ((uint16x2_t) {0xaaaa, 0xbbbb},
+			    (uint16x2_t) {0xcccc, 0xdddd});
+
+  if (a != 0x11227788)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktt16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktt16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktt16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-pktt16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for pktt16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int pktt16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__pktt16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_pktt16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_pktt16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0xdddd, 0xbbbb};
+#else
+  uint16x2_t va_p = {0xaaaa, 0xcccc};
+#endif
+
+  unsigned int a = pktt16 (0x11223344, 0x55667788);
+  uint16x2_t va = v_pktt16 ((uint16x2_t) {0xaaaa, 0xbbbb},
+			    (uint16x2_t) {0xcccc, 0xdddd});
+
+  if (a != 0x11225566)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for radd16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int radd16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__radd16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_radd16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_radd16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = radd16 (0x7fff7fff, 0x7fff7fff);
+  int16x2_t va = v_radd16 ((int16x2_t) {0x8000, 0x4000},
+			   (int16x2_t) {0x8000, 0x8000});
+
+  if (a != 0x7fff7fff)
+    abort ();
+  else if (va[0] != (short) 0x8000
+	   || va[1] != (short) 0xe000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd64.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for radd64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long radd64 (long long ra, long long rb)
+{
+  return __nds32__radd64 (ra, rb);
+}
+
+int
+main ()
+{
+  long long a = radd64 (0xf000000000000000ll, 0xf000000000000000ll);
+
+  if (a != 0xf000000000000000ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-radd8.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for radd8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int radd8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__radd8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int8x4_t v_radd8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_radd8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = radd8 (0x11223344, 0x55667788);
+  int8x4_t va = v_radd8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa},
+			 (int8x4_t) {0x7f, 0x80, 0x40, 0xaa});
+
+  if (a != 0x334455e6)
+    abort ();
+  else if (va[0] != 0x7f
+	   || va[1] != (char) 0x80
+	   || va[2] != (char) 0xe0
+	   || va[3] != (char) 0xaa)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-raddw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-raddw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-raddw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-raddw.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for raddw instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int raddw (int ra, int rb)
+{
+  return __nds32__raddw (ra, rb);
+}
+
+int
+main ()
+{
+  int a = raddw (0x80000000, 0x80000000);
+
+  if (a != 0x80000000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcras16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcras16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcras16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcras16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for rcras16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int rcras16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__rcras16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_rcras16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_rcras16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0x7fff, 0x8000};
+#else
+  int16x2_t va_p = {0xffff, 0};
+#endif
+
+  unsigned int a = rcras16 (0x0fff0000, 0x00000fff);
+  int16x2_t va = v_rcras16 ((int16x2_t) {0x7fff, 0x8000},
+			    (int16x2_t) {0x8000, 0x8000});
+
+  if (a != 0x0fff0000)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcrsa16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcrsa16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcrsa16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rcrsa16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for rcrsa16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int rcrsa16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__rcrsa16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_rcrsa16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_rcrsa16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0x8000, 0x8000};
+#else
+  int16x2_t va_p = {0, 0xffff};
+#endif
+
+  unsigned int a = rcrsa16 (0x7fff7fff, 0x7fff8000);
+  int16x2_t va = v_rcrsa16 ((int16x2_t) {0x8000, 0x8000},
+			    (int16x2_t) {0x7fff, 0x8000});
+
+  if (a != 0x7fff7fff)
+    abort ();
+  else if (va[0] != va_p [0]
+	   || va[1] != va_p [1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for rsub16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int rsub16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__rsub16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_rsub16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_rsub16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = rsub16 (0x7fff7fff, 0x80008000);
+  int16x2_t va = v_rsub16 ((int16x2_t) {0x8000, 0x8000},
+			   (int16x2_t) {0x7fff, 0x4000});
+
+  if (a != 0x7fff7fff)
+    abort ();
+  else if (va[0] != (short) 0x8000
+	   || va[1] != (short) 0xa000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub64.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for rsub64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long rsub64 (long long ra, long long rb)
+{
+  return __nds32__rsub64 (ra, rb);
+}
+
+int
+main ()
+{
+  long long a = rsub64 (0xe, 0xf);
+
+  if (a != 0xffffffffffffffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsub8.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for rsub8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int rsub8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__rsub8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int8x4_t v_rsub8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_rsub8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = rsub8 (0x55667788, 0x11223344);
+  int8x4_t va = v_rsub8 ((int8x4_t) {0x7f, 0x80, 0x80, 0xaa},
+			 (int8x4_t) {0x80, 0x7f, 0x40, 0xaa});
+
+  if (a != 0x222222a2)
+    abort ();
+  else if (va[0] != 0x7f
+	   || va[1] != (char) 0x80
+	   || va[2] != (char) 0xa0
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsubw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsubw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsubw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-rsubw.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for rsubw instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int rsubw (int ra, int rb)
+{
+  return __nds32__rsubw (ra, rb);
+}
+
+int
+main ()
+{
+  int a = rsubw (0x80000000, 0x7fffffff);
+
+  if (a != 0x80000000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for scmple16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int scmple16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__scmple16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_scmple16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_scmple16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = scmple16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_scmple16 ((int16x2_t) {0x7fff, 0x7ffe},
+			      (int16x2_t) {0x7ffe, 0x7fff});
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmple8.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for scmple8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int scmple8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__scmple8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_scmple8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_scmple8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = scmple8 (0xfefe0101, 0xffff0000);
+  uint8x4_t va = v_scmple8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01},
+			    (int8x4_t) {0x7f, 0x7f, 0x00, 0x00});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0xff
+           || va[1] != 0xff
+           || va[2] != 0
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for scmplt16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int scmplt16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__scmplt16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_scmplt16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_scmplt16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = scmplt16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_scmplt16 ((int16x2_t) {0x7fff, 0x7ffe},
+			      (int16x2_t) {0x7ffe, 0x7fff});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-scmplt8.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for scmplt8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int scmplt8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__scmplt8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_scmplt8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_scmplt8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = scmplt8 (0xfefe0101, 0xffff0000);
+  uint8x4_t va = v_scmplt8 ((int8x4_t) {0x7e, 0x7e, 0x01, 0x01},
+			    (int8x4_t) {0x7f, 0x7f, 0x00, 0x00});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0xff
+           || va[1] != 0xff
+           || va[2] != 0
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sll16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sll16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sll16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sll16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for sll16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sll16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__sll16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_sll16 (uint16x2_t ra, unsigned int rb)
+{
+  return __nds32__v_sll16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = sll16 (0x0f00f000, 4);
+  uint16x2_t va = v_sll16 ((uint16x2_t) {0x7fff, 0x8000}, 4);
+
+  if (a != 0xf0000000)
+    abort ();
+  else if (va[0] != 0xfff0
+	   || va[1] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbb.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,45 @@
+/* This is a test program for smalbb instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalbb (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalbb (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalbb (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalbb (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345679075ca9d3ll;
+#else
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345678ffffffffll;
+#endif
+
+  long long a = smalbb (0x12345678ffffffffll,0x00006789, 0x00001234);
+  long long va = v_smalbb (0x12345678ffffffffll, (int16x2_t) {0x6789, 0},
+						 (int16x2_t) {0x1234, 0});
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbt.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalbt.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,45 @@
+/* This is a test program for smalbt instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalbt (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalbt (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalbt (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalbt (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345679075ca9d3ll;
+#else
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345678ffffffffll;
+#endif
+
+  long long a = smalbt (0x12345678ffffffffll, 0x00006789, 0x12340000);
+  long long va = v_smalbt (0x12345678ffffffffll, (int16x2_t) {0x6789, 0},
+						 (int16x2_t) {0, 0x1234});
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smal.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smal.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smal.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smal.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,36 @@
+/* This is a test program for smal instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smal (long long ra, unsigned int rb)
+{
+  return __nds32__smal (ra, rb);
+}
+
+static __attribute__ ((noinline))
+long long v_smal (long long ra, int16x2_t rb)
+{
+  return __nds32__v_smal (ra, rb);
+}
+
+int
+main ()
+{
+  long long a = smal (0xfffff0000ll, 0x0001ffff);
+  long long va = v_smal (0xffffff0000ll,
+			 (int16x2_t) {0x0002, 0xffff});
+  if (a != 0xffffeffffll)
+    abort ();
+  else if (va != 0xfffffefffell)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalda.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalda.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalda.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalda.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for smalda instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalda (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalda (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalda (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalda (t, a, b);
+}
+
+
+int
+main ()
+{
+  long long a = smalda (0x12345678ffffffffll, 0x67890000, 0x12340000);
+  long long va = v_smalda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
+						 (int16x2_t) {0, 0x1234});
+
+  if (a != 0x12345679075CA9D3ll)
+    abort ();
+  else if (va != 0x12345679075CA9D3ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaldrs.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaldrs.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaldrs.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaldrs.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,46 @@
+/* This is a test program for smaldrs instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smaldrs (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smaldrs (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smaldrs (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smaldrs (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x12345678ffffaaaall;
+#else
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x1234567900005554ll;
+#endif
+
+  long long a = smaldrs (0x12345678ffffffffll, 0x67890001, 0x00011234);
+  long long va = v_smaldrs (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x6789},
+						  (int16x2_t) {0x1234, 0x0001});
+
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalds.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalds.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalds.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalds.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,46 @@
+/* This is a test program for smalds instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalds (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalds (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalds (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalds (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x12345678ffffaaaall;
+#else
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x1234567900005554ll;
+#endif
+
+  long long a = smalds (0x12345678ffffffffll, 0x12340001, 0x00016789);
+  long long va = v_smalds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234},
+						 (int16x2_t) {0x6789, 0x0001});
+
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaltt.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaltt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaltt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smaltt.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,46 @@
+/* This is a test program for smaltt instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smaltt (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smaltt (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smaltt (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smaltt (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345679075ca9d3ll;
+#else
+  long long a_p = 0x12345679075ca9d3ll;
+  long long va_p = 0x12345678ffffffffll;
+#endif
+
+  long long a = smaltt (0x12345678ffffffffll, 0x67890000, 0x12340000);
+  long long va = v_smaltt (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
+						 (int16x2_t) {0, 0x1234});
+
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxda.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxda.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxda.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxda.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for smalxda instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalxda (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalxda (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalxda (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalxda (t, a, b);
+}
+
+
+int
+main ()
+{
+  long long a = smalxda (0x12345678ffffffffll, 0x67890000, 0x00001234);
+  long long va = v_smalxda (0x12345678ffffffffll, (int16x2_t) {0, 0x6789},
+						  (int16x2_t) {0x1234, 0});
+
+  if (a != 0x12345679075CA9D3)
+    abort ();
+  else if (va != 0x12345679075CA9D3)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxds.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxds.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxds.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smalxds.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,46 @@
+/* This is a test program for smalxds instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smalxds (long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__smalxds (t, a, b);
+}
+
+static __attribute__ ((noinline))
+long long v_smalxds (long long t, int16x2_t a, int16x2_t b)
+{
+  return __nds32__v_smalxds (t, a, b);
+}
+
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x12345678ffffaaaall;
+#else
+  long long a_p = 0x12345678ffffaaaall;
+  long long va_p = 0x1234567900005554ll;
+#endif
+
+  long long a = smalxds (0x12345678ffffffffll, 0x12340001, 0x67890001);
+  long long va = v_smalxds (0x12345678ffffffffll, (int16x2_t) {0x0001, 0x1234},
+						  (int16x2_t) {0x0001, 0x6789});
+
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smar64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smar64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smar64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smar64.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for smar64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smar64 (long long t, int a, int b)
+{
+  return __nds32__smar64 (t, a, b);
+}
+
+int
+main ()
+{
+  long long a = smar64 (0xf000000000000000ll, 0x12345678, 0x23);
+
+  if (a != 0xf00000027d27d268ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax16.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for smax16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int smax16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smax16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_smax16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smax16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = smax16 (0xfffe0001, 0xffff0000);
+  int16x2_t va = v_smax16 ((int16x2_t) {0x7fff, 0},
+			   (int16x2_t) {0x7ffe, 1});
+  if (a != 0xffff0001)
+    abort ();
+  else if (va[0] != 0x7fff
+           || va[1] != 1)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smax8.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,41 @@
+/* This is a test program for smax8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int smax8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smax8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int8x4_t v_smax8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_smax8 (ra, rb);
+}
+
+
+int
+main ()
+{
+  unsigned int a = smax8 (0xffff0000, 0xfefe0001);
+  int8x4_t va = v_smax8 ((int8x4_t) {0x7f, 0x7f, 0x01, 0x01},
+			 (int8x4_t) {0x7e, 0x7e, 0x00, 0x00});
+
+  if (a != 0xffff0001)
+    abort ();
+  else if (va[0] != 0x7f
+           || va[1] != 0x7f
+           || va[2] != 1
+	   || va[3] != 1)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbb.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for smbb instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smbb (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smbb (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smbb (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smbb (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 1;
+#else
+  int va_p = 2;
+#endif
+
+  int a = smbb (0x80000002, 0x80000001);
+
+  int va = v_smbb ((int16x2_t) {0xffff, 0x0002},
+		   (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 2)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbt.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smbt.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for smbt instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smbt (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smbt (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smbt (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smbt (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 0xffffffff;
+#else
+  int va_p = 0xfffffffe;
+#endif
+
+  int a = smbt (0x80000002, 0x80000001);
+
+  int va = v_smbt ((int16x2_t) {0xffff, 0x0002},
+		   (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smdrs.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smdrs.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smdrs.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smdrs.c	2016-08-08 20:37:53.562581991 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smdrs instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smdrs (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smdrs (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smdrs (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smdrs (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 0xffffffff;
+#else
+  int va_p = 1;
+#endif
+
+  int a = smdrs (0x80000002, 0x80000001);
+  int va = v_smdrs ((int16x2_t) {0xffff, 0x0002},
+		    (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0xc0000002)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smds.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smds.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smds.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smds.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smds instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smds (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smds (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smds (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smds (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 1;
+#else
+  int va_p = 0xffffffff;
+#endif
+
+  int a = smds (0x80000002, 0x80000001);
+  int va = v_smds ((int16x2_t) {0xffff, 0x0002},
+		   (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0x3ffffffe)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smin16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smin16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smin16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smin16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for smin16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int smin16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smin16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_smin16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smin16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = smin16 (0xfffe0001, 0xffff0000);
+  int16x2_t v_sa = v_smin16 ((int16x2_t) {0x7fff, 0},
+			     (int16x2_t) {0x7ffe, 1});
+  if (a != 0xfffe0000)
+    abort ();
+  else if (v_sa[0] != 0x7ffe
+           || v_sa[1] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmul.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmul.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmul.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmul.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for smmul instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmul (int ra, int rb)
+{
+  return __nds32__smmul (ra, rb);
+}
+
+int
+main ()
+{
+  int a = smmul (0x80000000, 0x80000000);
+
+  if (a != 0x40000000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmulu.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmulu.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmulu.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmulu.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for smmul.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmul_u (int ra, int rb)
+{
+  return __nds32__smmul_u (ra, rb);
+}
+
+int
+main ()
+{
+  int a = smmul_u (0x80000002, 0x80000001);
+
+  if (a != 0x3fffffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwb.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwb.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwb.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwb.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smmwb instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmwb (int ra, unsigned int rb)
+{
+  return __nds32__smmwb (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smmwb (int ra, int16x2_t rb)
+{
+  return __nds32__v_smmwb (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 0;
+#else
+  int va_p = 0xffffffff;
+#endif
+
+  int a = smmwb (0x80000002, 0x80000001);
+
+  int va = v_smmwb (0xffff0002, (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0xffff8000)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwbu.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwbu.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwbu.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwbu.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smmwb.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmwb_u (int ra, unsigned int rb)
+{
+  return __nds32__smmwb_u (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smmwb_u (int ra, int16x2_t rb)
+{
+  return __nds32__v_smmwb_u (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 1;
+#else
+  int va_p = 0xffffffff;
+#endif
+
+  int a = smmwb_u (0x80000002, 0x80000001);
+
+  int va = v_smmwb_u (0xffff0002, (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0xffff8000)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwt.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwt.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smmwt instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmwt (int ra, unsigned int rb)
+{
+  return __nds32__smmwt (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smmwt (int ra, int16x2_t rb)
+{
+  return __nds32__v_smmwt (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 0xffffffff;
+#else
+  int va_p = 0;
+#endif
+
+  int a = smmwt (0x80000002, 0x80000001);
+
+  int va = v_smmwt (0xffff0002, (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0x3fffffff)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwtu.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwtu.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwtu.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smmwtu.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for smmwt.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smmwt_u (int ra, unsigned int rb)
+{
+  return __nds32__smmwt_u (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smmwt_u (int ra, int16x2_t rb)
+{
+  return __nds32__v_smmwt_u (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 0xffffffff;
+#else
+  int va_p = 1;
+#endif
+
+  int a = smmwt_u (0x80000002, 0x80000001);
+
+  int va = v_smmwt_u (0xffff0002, (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0x3fffffff)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslda.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslda.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslda.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslda.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for smslda instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smslda (long long rt, unsigned int ra, unsigned int rb)
+{
+  return __nds32__smslda (rt, ra, rb);
+}
+
+static __attribute__ ((noinline))
+long long v_smslda (long long rt, int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smslda (rt, ra, rb);
+}
+
+int
+main ()
+{
+  long long a = smslda (0xff0000000000ll, 0xffffffff, 0x2);
+  long long va = v_smslda (0x100000000ll,
+			   (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3});
+
+  if (a != 0xff0000000002ll)
+    abort ();
+  else if (va != 0x100000000ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslxda.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslxda.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslxda.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smslxda.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for smslxda instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smslxda (long long rt, unsigned int ra, unsigned int rb)
+{
+  return __nds32__smslxda (rt, ra, rb);
+}
+
+static __attribute__ ((noinline))
+long long v_smslxda (long long rt, int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smslxda (rt, ra, rb);
+}
+
+int
+main ()
+{
+  long long a = smslxda (0xff0000000000ll, 0xffffffff, 0x2);
+  long long va = v_smslxda (0x100000000ll,
+			    (int16x2_t) {0xf000, 0}, (int16x2_t) {0, 3});
+
+  if (a != 0xff0000000002ll)
+    abort ();
+  else if (va != 0x100003000ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smsr64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smsr64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smsr64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smsr64.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for smsr64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long smsr64 (long long t, int a, int b)
+{
+  return __nds32__smsr64 (t, a, b);
+}
+
+int
+main ()
+{
+  long long a = smsr64 (0x5000000300000000ll, 0x12345678, 0x23);
+
+  if (a != 0x5000000082D82D98ll)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smtt.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smtt.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smtt.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smtt.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for smtt instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smtt (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smtt (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smtt (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smtt (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int va_p = 2;
+#else
+  int va_p = 1;
+#endif
+
+  int a = smtt (0x80000002, 0x80000001);
+
+  int va = v_smtt ((int16x2_t) {0xffff, 0x0002},
+		   (int16x2_t) {0xffff, 0x0001});
+
+  if (a != 0x40000000)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smul16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smul16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smul16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smul16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for smul16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long smul16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smul16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int32x2_t v_smul16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smul16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = smul16 (0xffff0000, 0x0001ffff);
+  int32x2_t va = v_smul16 ((int16x2_t) {0xffff, 0},
+			   (int16x2_t) {0x0001, 0xffff});
+
+  if (a != 0xffffffff00000000)
+    abort ();
+  else if (va[0] != 0xffffffff
+           || va[1] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smulx16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smulx16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smulx16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smulx16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for smulx16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long smulx16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smulx16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int32x2_t v_smulx16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smulx16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = smulx16 (0xffff0000, 0xffff0001);
+  int32x2_t va = v_smulx16 ((int16x2_t) {0xffff, 0xffff},
+			    (int16x2_t) {1, 0});
+  if (a != 0xffffffff00000000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffffffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smxds.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smxds.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smxds.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-smxds.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,45 @@
+/* This is a test program for smxds instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int smxds (unsigned int ra, unsigned int rb)
+{
+  return __nds32__smxds (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int v_smxds (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_smxds (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int a_p = 0x8000;
+  int va_p = 0xffffffff;
+#else
+  int a_p = 0x8000;
+  int va_p = 1;
+#endif
+
+  int a = smxds (0x80000002, 0x80000001);
+  int va = v_smxds ((int16x2_t) {0xffff, 0x0002},
+		    (int16x2_t) {0xffff, 0x0001});
+
+  if (a != a_p)
+    abort ();
+  else if (va != va_p)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for sra16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sra16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__sra16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sra16 (int16x2_t ra, unsigned int rb)
+{
+  return __nds32__v_sra16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = sra16 (0x0ffff000, 4);
+  int16x2_t va = v_sra16 ((int16x2_t) {0x7fff, 0x8000}, 4);
+
+  if (a != 0x00ffff00)
+    abort ();
+  else if (va[0] != 0x7ff
+	   || va[1] != (short) 0xf800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16u.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16u.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16u.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sra16u.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for sra16.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sra16u (unsigned int ra, unsigned int rb)
+{
+  return __nds32__sra16_u (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sra16u (int16x2_t ra, unsigned int rb)
+{
+  return __nds32__v_sra16_u (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = sra16u (0x0ffff000, 4);
+  int16x2_t va = v_sra16u ((int16x2_t) {0x7fff, 0x8000}, 4);
+
+  if (a != 0x100ff00)
+    abort ();
+  else if (va[0] != 0x800
+	   || va[1] != (short) 0xf800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,39 @@
+/* This is a test program for srai16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srai16 (unsigned int ra)
+{
+  return __nds32__sra16 (ra, 4);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_srai16 (int16x2_t ra)
+{
+  return __nds32__v_sra16 (ra, 4);
+}
+
+int
+main ()
+{
+  unsigned int a = srai16 (0x0ffff000);
+
+  int16x2_t aa;
+  int16x2_t va = v_srai16 ((int16x2_t) {0x7fff, 0x8000});
+
+  if (a != 0x00ffff00)
+    abort ();
+  else if (va[0] != 0x7ff
+	   || va[1] != (short) 0xf800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16u.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16u.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16u.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srai16u.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for srai16.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srai16u (unsigned int ra)
+{
+  return __nds32__sra16_u (ra, 4);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_srai16u (int16x2_t ra)
+{
+  return __nds32__v_sra16_u (ra, 4);
+}
+
+int
+main ()
+{
+  unsigned int a = srai16u (0x0ffff000);
+  int16x2_t va = v_srai16u ((int16x2_t) {0x7fff, 0x8000});
+
+  if (a != 0x100ff00)
+    abort ();
+  else if (va[0] != 0x800
+	   || va[1] != (short) 0xf800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sraiu.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sraiu.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sraiu.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sraiu.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for srai.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int sraiu (int ra)
+{
+  return __nds32__sra_u (ra, 8);
+}
+
+int
+main ()
+{
+  int a = sraiu (0xf00ff);
+
+  if (a != 0xf01)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srau.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srau.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srau.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srau.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for sra.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+int srau (int ra, unsigned int rb)
+{
+  return __nds32__sra_u (ra, rb);
+}
+
+int
+main ()
+{
+  int a = srau (0xf00ff, 8);
+
+  if (a != 0xf01)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for srl16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srl16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__srl16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_srl16 (uint16x2_t ra, unsigned int rb)
+{
+  return __nds32__v_srl16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = srl16 (0x0f00f000, 4);
+  uint16x2_t va = v_srl16 ((uint16x2_t) {0x7fff, 0x8000}, 4);
+
+  if (a != 0xf00f00)
+    abort ();
+  else if (va[0] != 0x7ff
+	   || va[1] != 0x0800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16u.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16u.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16u.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srl16u.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for srl16.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srl16_u (unsigned int ra, unsigned int rb)
+{
+  return __nds32__srl16_u (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_srl16_u (uint16x2_t ra, unsigned int rb)
+{
+  return __nds32__v_srl16_u (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = srl16_u (0x0f00f000, 4);
+  uint16x2_t va = v_srl16_u ((uint16x2_t) {0x7fff, 0x8000}, 4);
+
+  if (a != 0xf00f00)
+    abort ();
+  else if (va[0] != 0x800
+	   || va[1] != 0x800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for srli16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srli16 (unsigned int ra)
+{
+  return __nds32__srl16 (ra, 4);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_srli16 (uint16x2_t ra)
+{
+  return __nds32__v_srl16 (ra, 4);
+}
+
+int
+main ()
+{
+  unsigned int a = srli16 (0x0f00f000);
+  uint16x2_t va = v_srli16 ((uint16x2_t) {0x7fff, 0x8000});
+
+  if (a != 0xf00f00)
+    abort ();
+  else if (va[0] != 0x7ff
+	   || va[1] != 0x0800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16u.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16u.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16u.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-srli16u.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for sril16.u instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int srli16_u (unsigned int ra)
+{
+  return __nds32__srl16_u (ra, 4);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_srli16_u (uint16x2_t ra)
+{
+  return __nds32__v_srl16_u (ra, 4);
+}
+
+int
+main ()
+{
+  unsigned int a = srli16_u (0x0f00f000);
+  uint16x2_t va = v_srli16_u ((uint16x2_t) {0x7fff, 0x8000});
+
+  if (a != 0xf00f00)
+    abort ();
+  else if (va[0] != 0x800
+	   || va[1] != 0x800)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub16.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,49 @@
+/* This is a test program for sub16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sub16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__sub16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_usub16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_usub16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_ssub16 (int16x2_t ra, int16x2_t rb)
+{
+  return __nds32__v_ssub16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = sub16 (0x00010000, 0x00010001);
+  uint16x2_t v_ua = v_usub16 ((uint16x2_t) {0x1000, 0x0001},
+			      (uint16x2_t) {0xf000, 0x0000});
+  int16x2_t v_sa = v_ssub16 ((int16x2_t) {0x7777, 0x2111},
+			     (int16x2_t) {0x1000, 0x2000});
+
+  if (a != 0x0000ffff)
+    abort ();
+  else if (v_ua[0] != 0x2000
+	   || v_ua[1] != 0x0001)
+    abort ();
+  else if (v_sa[0] != 0x6777
+	   || v_sa[1] != 0x0111)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub64.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,36 @@
+/* This is a test program for sub64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+long long ssub64 (long long ra, long long rb)
+{
+  return __nds32__ssub64 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+unsigned long long usub64 (unsigned long long ra, unsigned long long rb)
+{
+  return __nds32__usub64 (ra, rb);
+}
+
+int
+main ()
+{
+  long long sa = ssub64 (0x100000000ll, 0xffffffffll);
+  unsigned long long ua = usub64 (0xf00000000ull, 0x1111ull);
+
+  if (sa != 1ll)
+    abort ();
+  else if (ua != 0xeffffeeefull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sub8.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,53 @@
+/* This is a test program for sub8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sub8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__sub8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_usub8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_usub8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+int8x4_t v_ssub8 (int8x4_t ra, int8x4_t rb)
+{
+  return __nds32__v_ssub8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = sub8 (0x55667788, 0x11223344);
+  uint8x4_t v_ua = v_usub8 ((uint8x4_t) {0xff, 0xee, 0xee, 0xcc},
+			    (uint8x4_t) {0x1, 0xee, 0xdd, 0xdd});
+  int8x4_t v_sa = v_ssub8 ((int8x4_t) {0x81, 0x0, 0xdd, 0xaa},
+			   (int8x4_t) {0x80, 0x1, 0xcc, 0xaa});
+
+  if (a != 0x44444444)
+    abort ();
+  else if (v_ua[0] != 0xfe
+	   || v_ua[1] != 0
+	   || v_ua[2] != 0x11
+	   || v_ua[3] != 0xef)
+    abort ();
+  else if (v_sa[0] != 1
+	   || v_sa[1] != (char) 0xff
+	   || v_sa[2] != 0x11
+	   || v_sa[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd810.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd810.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd810.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd810.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for sunpkd810 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sunpkd810 (unsigned int a)
+{
+  return __nds32__sunpkd810 (a);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sunpkd810 (int8x4_t a)
+{
+  return __nds32__v_sunpkd810 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xfff8, 0x56};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = sunpkd810 (0x000056f8);
+  int16x2_t va = v_sunpkd810 ((int8x4_t) {0xf8, 0x56, 0, 0});
+
+  if (a != 0x0056fff8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd820.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd820.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd820.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd820.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for sunpkd820 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sunpkd820 (unsigned int a)
+{
+  return __nds32__sunpkd820 (a);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sunpkd820 (int8x4_t a)
+{
+  return __nds32__v_sunpkd820 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xfff8, 0x34};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = sunpkd820 (0x003400f8);
+  int16x2_t va = v_sunpkd820 ((int8x4_t) {0xf8, 0, 0x34, 0});
+
+  if (a != 0x0034fff8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd830.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd830.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd830.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd830.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for sunpkd830 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sunpkd830 (unsigned int a)
+{
+  return __nds32__sunpkd830 (a);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sunpkd830 (int8x4_t a)
+{
+  return __nds32__v_sunpkd830 (a);
+}
+
+int
+main ()
+{
+  unsigned int a = sunpkd830 (0x120000f8);
+  int16x2_t va = v_sunpkd830 ((int8x4_t) {0xf8, 0x00, 0, 0x12});
+
+  if (a != 0x0012fff8)
+    abort ();
+  else if (va[0] != (short) 0xfff8
+           || va[1] != 0x0012)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd831.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd831.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd831.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-sunpkd831.c	2016-08-08 20:37:53.566582146 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for sunpkd831 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int sunpkd831 (unsigned int a)
+{
+  return __nds32__sunpkd831 (a);
+}
+
+static __attribute__ ((noinline))
+int16x2_t v_sunpkd831 (int8x4_t a)
+{
+  return __nds32__v_sunpkd831 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xfff8, 0x12};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = sunpkd831 (0x1200f800);
+  int16x2_t va = v_sunpkd831 ((int8x4_t) {0, 0xf8, 0, 0x12});
+
+  if (a != 0x0012fff8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for ucmple16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ucmple16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ucmple16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ucmple16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ucmple16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ucmple16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_ucmple16 ((uint16x2_t) {0x7fff, 0x7ffe},
+			      (uint16x2_t) {0x7ffe, 0x7fff});
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmple8.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for ucmple8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ucmple8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ucmple8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_ucmple8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_ucmple8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ucmple8 (0xfefe0101, 0xffff0000);
+  uint8x4_t va = v_ucmple8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01},
+			    (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0xff
+           || va[1] != 0xff
+           || va[2] != 0
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for ucmplt16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ucmplt16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ucmplt16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ucmplt16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ucmplt16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ucmplt16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_ucmplt16 ((uint16x2_t) {0x7fff, 0x7ffe},
+			      (uint16x2_t) {0x7ffe, 0x7fff});
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ucmplt8.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for ucmplt8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ucmplt8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ucmplt8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_ucmplt8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_ucmplt8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ucmplt8 (0xfefe0101, 0xffff0000);
+  uint8x4_t va = v_ucmplt8 ((uint8x4_t) {0x7e, 0x7e, 0x01, 0x01},
+			    (uint8x4_t) {0x7f, 0x7f, 0x00, 0x00});
+
+  if (a != 0xffff0000)
+    abort ();
+  else if (va[0] != 0xff
+           || va[1] != 0xff
+           || va[2] != 0
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umar64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umar64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umar64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umar64.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for umar64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long umar64 (unsigned long long t,unsigned int a,unsigned int b)
+{
+  return __nds32__umar64 (t, a, b);
+}
+
+int
+main ()
+{
+  unsigned long long a = umar64 (0xf000000000000000ull, 0x12345678, 0x23);
+
+  if (a != 0xf00000027d27d268ull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for umax16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int umax16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__umax16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_umax16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_umax16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = umax16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_umax16 ((uint16x2_t) {0xffff, 0},
+			    (uint16x2_t) {0xfffe, 1});
+  if (a != 0xffff0001)
+    abort ();
+  else if (va[0] != 0xffff
+           || va[1] != 1)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umax8.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,41 @@
+/* This is a test program for umax8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int umax8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__umax8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_umax8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_umax8 (ra, rb);
+}
+
+
+int
+main ()
+{
+  unsigned int a = umax8 (0xffff0000, 0xfffe0001);
+  uint8x4_t va = v_umax8 ((uint8x4_t) {0xff, 0xff, 0x01, 0x01},
+			  (uint8x4_t) {0xfe, 0xfe, 0x00, 0x00});
+
+  if (a != 0xffff0001)
+    abort ();
+  else if (va[0] != 0xff
+           || va[1] != 0xff
+           || va[2] != 1
+	   || va[3] != 1)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umin16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umin16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umin16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umin16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for umin16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int umin16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__umin16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_umin16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_umin16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = umin16 (0xfffe0001, 0xffff0000);
+  uint16x2_t va = v_umin16 ((uint16x2_t) {0x7fff, 0},
+			    (uint16x2_t) {0x7ffe, 1});
+  if (a != 0xfffe0000)
+    abort ();
+  else if (va[0] != 0x7ffe
+           || va[1] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umsr64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umsr64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umsr64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umsr64.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for umsr64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long umsr64 (unsigned long long t, unsigned int a, unsigned int b)
+{
+  return __nds32__umsr64 (t, a, b);
+}
+
+int
+main ()
+{
+  unsigned long long a = umsr64 (0x5000000300000000ull, 0x12345678, 0x23);
+
+  if (a != 0x5000000082D82D98ull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umul16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umul16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umul16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umul16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for umul16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long umul16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__umul16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint32x2_t v_umul16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_umul16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = umul16 (0xffff0000, 0x0001ffff);
+  uint32x2_t va = v_umul16 ((uint16x2_t) {0xffff, 0},
+			    (uint16x2_t) {0x0001, 0xffff});
+  if (a != 0xffff00000000)
+    abort ();
+  else if (va[0] != 0xffff
+           || va[1] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umulx16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umulx16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umulx16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-umulx16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for umulx16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long umulx16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__umulx16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint32x2_t v_umulx16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_umulx16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = umulx16 (0xffff0000, 0xffff0001);
+  uint32x2_t va = v_umulx16 ((uint16x2_t) {0xffff, 0xffff},
+			     (uint16x2_t) {1, 0});
+  if (a != 0xffff00000000)
+    abort ();
+  else if (va[0] != 0
+           || va[1] != 0xffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for uradd16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int uradd16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__uradd16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_uradd16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_uradd16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = uradd16 (0x7fff7fff, 0x7fff7fff);
+  uint16x2_t va = v_uradd16 ((uint16x2_t) {0x8000, 0x4000},
+			     (uint16x2_t) {0x8000, 0x8000});
+
+  if (a != 0x7fff7fff)
+    abort ();
+  else if (va[0] != 0x8000
+	   || va[1] != 0x6000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd64.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for uradd64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long uradd64 (unsigned long long ra, unsigned long long rb)
+{
+  return __nds32__uradd64 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = uradd64 (0xf000000000000000ull, 0xf000000000000000ull);
+
+  if (a != 0xf000000000000000ull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uradd8.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for uradd8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int uradd8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__uradd8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_uradd8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_uradd8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = uradd8 (0x11223344, 0x55667788);
+  uint8x4_t va = v_uradd8 ((uint8x4_t) {0x7f, 0x80, 0x40, 0xaa},
+			   (uint8x4_t) {0x7f, 0x80, 0x80, 0xaa});
+
+  if (a != 0x33445566)
+    abort ();
+  else if (va[0] != 0x7f
+	   || va[1] != 0x80
+	   || va[2] != 0x60
+	   || va[3] != 0xaa)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uraddw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uraddw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uraddw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-uraddw.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for uraddw instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int uraddw (unsigned int ra, unsigned int rb)
+{
+  return __nds32__uraddw (ra, rb);
+}
+
+unsigned int
+main ()
+{
+  unsigned int a = uraddw (0x80000000, 0x80000000);
+
+  if (a != 0x80000000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcras16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcras16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcras16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcras16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for urcras16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int urcras16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__urcras16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_urcras16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_urcras16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0xffff, 0x8000};
+#else
+  uint16x2_t va_p = {0x7fff, 0};
+#endif
+
+  unsigned int a = urcras16 (0x7fff7fff, 0x80007fff);
+  uint16x2_t va = v_urcras16 ((uint16x2_t) {0x7fff, 0x8000},
+			      (uint16x2_t) {0x8000, 0x8000});
+
+  if (a != 0x7fffffff)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcrsa16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcrsa16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcrsa16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-urcrsa16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,44 @@
+/* This is a test program for urcrsa16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int urcrsa16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__urcrsa16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_urcrsa16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_urcrsa16 (ra, rb);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  uint16x2_t va_p = {0x8000, 0xffff};
+#else
+  uint16x2_t va_p = {0, 0x7fff};
+#endif
+
+  unsigned int a = urcrsa16 (0x7fff7fff, 0x7fff8000);
+  uint16x2_t va = v_urcrsa16 ((uint16x2_t) {0x8000, 0x7fff},
+			      (uint16x2_t) {0x8000, 0x8000});
+
+  if (a != 0xffff7fff)
+    abort ();
+  else if (va[0] != va_p[0]
+	   || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub16.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub16.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub16.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub16.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,38 @@
+/* This is a test program for ursub16 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ursub16 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ursub16 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_ursub16 (uint16x2_t ra, uint16x2_t rb)
+{
+  return __nds32__v_ursub16 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ursub16 (0x7fff7fff, 0x80008000);
+  uint16x2_t va = v_ursub16 ((uint16x2_t) {0x8000, 0x8000},
+			     (uint16x2_t) {0x7fff, 0x4000});
+
+  if (a != 0xffffffff)
+    abort ();
+  else if (va[0] != 0
+	   || va[1] != 0x2000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub64.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub64.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub64.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub64.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for ursub64 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned long long ursub64 (unsigned long long ra, unsigned long long rb)
+{
+  return __nds32__ursub64 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned long long a = ursub64 (0xeull, 0xfull);
+
+  if (a != 0xffffffffffffffffull)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub8.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub8.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub8.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursub8.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,40 @@
+/* This is a test program for ursub8 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ursub8 (unsigned int ra, unsigned int rb)
+{
+  return __nds32__ursub8 (ra, rb);
+}
+
+static __attribute__ ((noinline))
+uint8x4_t v_ursub8 (uint8x4_t ra, uint8x4_t rb)
+{
+  return __nds32__v_ursub8 (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ursub8 (0x55667788, 0x11223344);
+  uint8x4_t va = v_ursub8 ((uint8x4_t) {0x7f, 0x80, 0x80, 0xaa},
+			   (uint8x4_t) {0x80, 0x7f, 0x40, 0xaa});
+
+  if (a != 0x22222222)
+    abort ();
+  else if (va[0] != 0xff
+	   || va[1] != 0
+	   || va[2] != 0x20
+	   || va[3] != 0)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursubw.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursubw.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursubw.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-ursubw.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for ursubw instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int ursubw (unsigned int ra,unsigned int rb)
+{
+  return __nds32__ursubw (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = ursubw (0x80000000, 0x40000000);
+
+  if (a != 0x20000000)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wext.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wext.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wext.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wext.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for wext instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int wext (long long ra, unsigned int rb)
+{
+  return __nds32__wext (ra, rb);
+}
+
+int
+main ()
+{
+  unsigned int a = wext (0x1234ffff0000ll, 16);
+
+  if (a != 0x1234ffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wexti.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wexti.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wexti.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-wexti.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,27 @@
+/* This is a test program for wexti instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int wexti (long long ra)
+{
+  return __nds32__wext (ra, 16);
+}
+
+int
+main ()
+{
+  unsigned int a = wexti (0x1234ffff0000ll);
+
+  if (a != 0x1234ffff)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd810.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd810.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd810.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd810.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for zunpkd810 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int zunpkd810 (unsigned int a)
+{
+  return __nds32__zunpkd810 (a);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_zunpkd810 (uint8x4_t a)
+{
+  return __nds32__v_zunpkd810 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xf8, 0x56};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = zunpkd810 (0x000056f8);
+  uint16x2_t va = v_zunpkd810 ((uint8x4_t) {0xf8, 0x56, 0, 0});
+
+  if (a != 0x005600f8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd820.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd820.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd820.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd820.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for zunpkd820 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int zunpkd820 (unsigned int a)
+{
+  return __nds32__zunpkd820 (a);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_zunpkd820 (uint8x4_t a)
+{
+  return __nds32__v_zunpkd820 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xf8, 0x34};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = zunpkd820 (0x003400f8);
+  uint16x2_t va = v_zunpkd820 ((uint8x4_t) {0xf8, 0, 0x34, 0});
+
+  if (a != 0x003400f8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd830.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd830.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd830.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd830.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,37 @@
+/* This is a test program for zunpkd830 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int zunpkd830 (unsigned int a)
+{
+  return __nds32__zunpkd830 (a);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_zunpkd830 (uint8x4_t a)
+{
+  return __nds32__v_zunpkd830 (a);
+}
+
+int
+main ()
+{
+  unsigned int a = zunpkd830 (0x120000f8);
+  uint16x2_t va = v_zunpkd830 ((uint8x4_t) { 0xf8, 0x00, 0, 0x12});
+
+  if (a != 0x001200f8)
+    abort ();
+  else if (va[0] != 0x00f8
+           || va[1] != 0x0012)
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd831.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd831.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd831.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp/builtin-dsp-zunpkd831.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,43 @@
+/* This is a test program for zunpkd831 instruction.  */
+
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+#ifdef __NDS32_EXT_DSP__
+static __attribute__ ((noinline))
+unsigned int zunpkd831 (unsigned int a)
+{
+  return __nds32__zunpkd831 (a);
+}
+
+static __attribute__ ((noinline))
+uint16x2_t v_zunpkd831 (uint8x4_t a)
+{
+  return __nds32__v_zunpkd831 (a);
+}
+
+int
+main ()
+{
+#ifdef __NDS32_EL__
+  int16x2_t va_p = {0xf8, 0x12};
+#else
+  int16x2_t va_p = {0, 0};
+#endif
+
+  unsigned int a = zunpkd831 (0x1200f800);
+  uint16x2_t va = v_zunpkd831 ((uint8x4_t) {0, 0xf8, 0, 0x12});
+
+  if (a != 0x001200f8)
+    abort ();
+  else if (va[0] != va_p[0]
+           || va[1] != va_p[1])
+    abort ();
+  else
+    exit (0);
+}
+#else
+int main(){return 0;}
+#endif
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-add-sub.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-add-sub.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-add-sub.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-add-sub.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "add8" } } */
+/* { dg-final { scan-assembler "add16" } } */
+/* { dg-final { scan-assembler "add64" } } */
+/* { dg-final { scan-assembler "sub8" } } */
+/* { dg-final { scan-assembler "sub16" } } */
+/* { dg-final { scan-assembler "sub64" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+v4qi  __attribute__ ((noinline))
+add8 (v4qi a, v4qi b)
+{
+  return a + b;
+}
+
+v4qi  __attribute__ ((noinline))
+sub8 (v4qi a, v4qi b)
+{
+  return a - b;
+}
+
+v2hi  __attribute__ ((noinline))
+add16 (v2hi a, v2hi b)
+{
+  return a + b;
+}
+
+v2hi  __attribute__ ((noinline))
+sub16 (v2hi a, v2hi b)
+{
+  return a - b;
+}
+
+long long
+add64 (long long a, long long b)
+{
+  return a + b;
+}
+
+long long
+sub64 (long long a, long long b)
+{
+  return a - b;
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-bpick.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-bpick.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-bpick.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-bpick.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "bpick" } } */
+
+int bpick(int a, int b, int mask)
+{
+  return (a & mask) | (b & ~mask);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-mmul.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-mmul.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-mmul.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-mmul.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "smmul" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+int smmul(int a, int b)
+{
+  long long tmp = (long long)a * b;
+  return (int)((tmp >> 32) & 0xffffffffll);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-mulhisi.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-mulhisi.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-mulhisi.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-mulhisi.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "smbb" } } */
+/* { dg-final { scan-assembler "smbt" } } */
+/* { dg-final { scan-assembler "smtt" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+int smbb(v2hi a, v2hi b)
+{
+  return a[0] * b[0];
+}
+
+int smbt(v2hi a, v2hi b)
+{
+  return a[0] * b[1];
+}
+
+int smtt(v2hi a, v2hi b)
+{
+  return a[1] * b[1];
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-raddsub.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-raddsub.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-raddsub.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-raddsub.c	2016-08-08 20:37:53.570582300 +0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "raddw" } } */
+/* { dg-final { scan-assembler "rsubw" } } */
+/* { dg-final { scan-assembler "uraddw" } } */
+/* { dg-final { scan-assembler "ursubw" } } */
+
+int raddw(int a, int b)
+{
+  return (a + b) >> 1;
+}
+
+int rsubw(int a, int b)
+{
+  return (a - b) >> 1;
+}
+
+unsigned uraddw(unsigned a, unsigned b)
+{
+  return (a + b) >> 1;
+}
+
+unsigned ursubw(unsigned a, unsigned b)
+{
+  return (a - b) >> 1;
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-smals.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-smals.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-smals.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-smals.c	2016-08-08 20:37:53.574582455 +0200
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "smalbb" } } */
+/* { dg-final { scan-assembler "smalbt" } } */
+/* { dg-final { scan-assembler "smaltt" } } */
+/* { dg-final { scan-assembler "smal" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+
+long long smalbb(long long acc, v2hi a, v2hi b)
+{
+  return acc + a[0] * b[0];
+}
+
+long long smalbt(long long acc, v2hi a, v2hi b)
+{
+  return acc + a[1] * b[0];
+}
+
+long long smaltt(long long acc, v2hi a, v2hi b)
+{
+  return acc + a[1] * b[1];
+}
+
+long long smal(v2hi a, long long b)
+{
+  return b + (long long)(a[0] * a[1]);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-smalxda.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-smalxda.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-smalxda.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-smalxda.c	2016-08-08 20:37:53.574582455 +0200
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "smalxda" } } */
+/* { dg-final { scan-assembler "smalxds" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+long long smalxda(long long acc, v2hi a, v2hi b)
+{
+  return acc + (a[0] * b[1] + a[1] * b[0]);
+}
+
+long long smalxds(long long acc, v2hi a, v2hi b)
+{
+  return acc + (a[1] * b[0] - a[0] * b[1]);
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-unpkd.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-unpkd.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-unpkd.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-unpkd.c	2016-08-08 20:37:53.574582455 +0200
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mext-dsp" } */
+/* { dg-final { scan-assembler "sunpkd810" } } */
+/* { dg-final { scan-assembler "sunpkd820" } } */
+/* { dg-final { scan-assembler "sunpkd830" } } */
+/* { dg-final { scan-assembler "sunpkd831" } } */
+/* { dg-final { scan-assembler "zunpkd810" } } */
+/* { dg-final { scan-assembler "zunpkd820" } } */
+/* { dg-final { scan-assembler "zunpkd830" } } */
+/* { dg-final { scan-assembler "zunpkd831" } } */
+
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef unsigned char uv4qi __attribute__ ((vector_size (4)));
+typedef unsigned short uv2hi __attribute__ ((vector_size (4)));
+
+v2hi sunpkd810(v4qi v)
+{
+  v2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[1];
+  return ret;
+}
+
+v2hi sunpkd820(v4qi v)
+{
+  v2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[2];
+  return ret;
+}
+
+v2hi sunpkd830(v4qi v)
+{
+  v2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[3];
+  return ret;
+}
+
+v2hi sunpkd831(v4qi v)
+{
+  v2hi ret;
+  ret[0] = v[1];
+  ret[1] = v[3];
+  return ret;
+}
+
+uv2hi zunpkd810(uv4qi v)
+{
+  uv2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[1];
+  return ret;
+}
+
+uv2hi zunpkd820(uv4qi v)
+{
+  uv2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[2];
+  return ret;
+}
+
+uv2hi zunpkd830(uv4qi v)
+{
+  uv2hi ret;
+  ret[0] = v[0];
+  ret[1] = v[3];
+  return ret;
+}
+
+uv2hi zunpkd831(uv4qi v)
+{
+  uv2hi ret;
+  ret[0] = v[1];
+  ret[1] = v[3];
+  return ret;
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/dsp-v2hi-packing00.c	2016-08-08 20:37:53.574582455 +0200
@@ -0,0 +1,127 @@
+/* { dg-do run } */
+
+#include <nds32_intrinsic.h>
+
+int16x2_t packing01(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
+int16x2_t packing01(int16x2_t x, int16x2_t y)
+{
+  int16x2_t ret;
+  ret[0] = x[0];
+  ret[1] = y[1];
+  return ret;
+}
+
+int16x2_t packing10(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
+int16x2_t packing10(int16x2_t x, int16x2_t y)
+{
+  int16x2_t ret;
+  ret[0] = x[1];
+  ret[1] = y[0];
+  return ret;
+}
+
+int16x2_t packing00(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
+int16x2_t packing00(int16x2_t x, int16x2_t y)
+{
+  int16x2_t ret;
+  ret[0] = x[0];
+  ret[1] = y[0];
+  return ret;
+}
+
+int16x2_t packing0cv0(int16x2_t x) __attribute__ ((noinline));
+int16x2_t packing0cv0(int16x2_t x)
+{
+  int16x2_t ret = {0, 0};
+  ret[0] = x[0];
+  return ret;
+}
+
+int16x2_t packingcv00(int16x2_t x) __attribute__ ((noinline));
+int16x2_t packingcv00(int16x2_t x)
+{
+  int16x2_t ret = {0, 0};
+  ret[1] = x[0];
+  return ret;
+}
+
+int16x2_t packing11(int16x2_t x, int16x2_t y) __attribute__ ((noinline));
+int16x2_t packing11(int16x2_t x, int16x2_t y)
+{
+  int16x2_t ret;
+  ret[0] = x[1];
+  ret[1] = y[1];
+  return ret;
+}
+int16x2_t packing1cv0(int16x2_t x) __attribute__ ((noinline));
+int16x2_t packing1cv0(int16x2_t x)
+{
+  int16x2_t ret = {0, 0};
+  ret[0] = x[1];
+  return ret;
+}
+
+int16x2_t packingcv01(int16x2_t x) __attribute__ ((noinline));
+int16x2_t packingcv01(int16x2_t x)
+{
+  int16x2_t ret = {0, 0};
+  ret[1] = x[1];
+  return ret;
+}
+
+int main() {
+  int16x2_t a = {0x11, 0x22};
+  int16x2_t b = {0x33, 0x44};
+
+  int16x2_t ret00, ret01, ret10, ret11;
+  int16x2_t ret0cv0, retcv00, ret1cv0, retcv01;
+  ret00 = packing00 (a, b);
+
+  if (ret00[0] != 0x11
+      || ret00[1] != 0x33)
+    return 1;
+
+  ret0cv0 = packing0cv0 (a);
+
+  if (ret0cv0[0] != 0x11
+      || ret0cv0[1] != 0)
+    return 1;
+
+  retcv00 = packingcv00 (a);
+
+  if (retcv00[0] != 0
+      || retcv00[1] != 0x11)
+    return 1;
+
+  ret11 = packing11 (a, b);
+
+  if (ret11[0] != 0x22
+      || ret11[1] != 0x44)
+    return 1;
+
+  ret1cv0 = packing1cv0 (a);
+
+  if (ret1cv0[0] != 0x22
+      || ret1cv0[1] != 0)
+    return 1;
+
+  retcv01 = packingcv01 (a);
+
+  if (retcv01[0] != 0
+      || retcv01[1] != 0x22)
+    return 1;
+
+  ret01 = packing01 (a, b);
+
+  if (ret01[0] != 0x11
+      || ret01[1] != 0x44)
+    return 1;
+
+  ret10 = packing10 (a, b);
+
+  if (ret10[0] != 0x22
+      || ret10[1] != 0x33)
+    return 1;
+
+  return 0;
+}
diff -Nur gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/nds32.exp gcc-4.9.4/gcc/testsuite/gcc.target/nds32/nds32.exp
--- gcc-4.9.4.orig/gcc/testsuite/gcc.target/nds32/nds32.exp	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/gcc.target/nds32/nds32.exp	2016-08-08 20:37:53.574582455 +0200
@@ -1,5 +1,5 @@
 # Target test cases of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
@@ -40,6 +40,8 @@
 # Main loop.
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
 	"" $DEFAULT_CFLAGS
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dsp/*.\[cS\]]] \
+	""
 
 # All done.
 dg-finish
diff -Nur gcc-4.9.4.orig/gcc/testsuite/g++.dg/init/array15.C gcc-4.9.4/gcc/testsuite/g++.dg/init/array15.C
--- gcc-4.9.4.orig/gcc/testsuite/g++.dg/init/array15.C	2004-12-09 10:37:37.000000000 +0100
+++ gcc-4.9.4/gcc/testsuite/g++.dg/init/array15.C	2016-08-08 20:37:53.542581218 +0200
@@ -1,4 +1,6 @@
 // { dg-do run }
+// { dg-require-effective-target nds32_full_addr_space { target nds32*-*-elf* } }
+// { dg-options "-mcmodel=large" { target nds32*-*-elf* } }
 
 // Copyright (C) 2004 Free Software Foundation, Inc.
 // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com>
diff -Nur gcc-4.9.4.orig/gcc/testsuite/g++.dg/init/array16.C gcc-4.9.4/gcc/testsuite/g++.dg/init/array16.C
--- gcc-4.9.4.orig/gcc/testsuite/g++.dg/init/array16.C	2010-08-11 04:00:15.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/g++.dg/init/array16.C	2016-08-08 20:37:53.542581218 +0200
@@ -2,6 +2,7 @@
 // have "compile" for some targets and "run" for others.
 // { dg-do run { target { ! mmix-*-* } } }
 // { dg-options "-mstructure-size-boundary=8" { target arm*-*-* } }
+// { dg-skip-if "" { nds32_gp_direct } }
 
 // Copyright (C) 2004 Free Software Foundation, Inc.
 // Contributed by Nathan Sidwell 8 Dec 2004 <nathan@codesourcery.com>
diff -Nur gcc-4.9.4.orig/gcc/testsuite/g++.dg/torture/type-generic-1.C gcc-4.9.4/gcc/testsuite/g++.dg/torture/type-generic-1.C
--- gcc-4.9.4.orig/gcc/testsuite/g++.dg/torture/type-generic-1.C	2009-09-01 00:23:27.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/g++.dg/torture/type-generic-1.C	2016-08-08 20:37:53.546581373 +0200
@@ -4,6 +4,7 @@
 /* { dg-do run } */
 /* { dg-add-options ieee } */
 /* { dg-skip-if "No Inf/NaN support" { spu-*-* } } */
+/* { dg-skip-if "No Denormmalized support" { nds32_ext_fpu } } */
 
 #include "../../gcc.dg/tg-tests.h"
 
diff -Nur gcc-4.9.4.orig/gcc/testsuite/lib/target-supports.exp gcc-4.9.4/gcc/testsuite/lib/target-supports.exp
--- gcc-4.9.4.orig/gcc/testsuite/lib/target-supports.exp	2016-05-22 10:53:32.000000000 +0200
+++ gcc-4.9.4/gcc/testsuite/lib/target-supports.exp	2016-08-08 20:37:53.574582455 +0200
@@ -453,6 +453,10 @@
 	 || [istarget hppa64-hp-hpux11.23] } {
 	return 0;
     }
+    if { [istarget nds32*-*-*]
+	 && [check_effective_target_nds32_reduced_regs] } {
+	return 0;
+    }
     return 1
 }
 
@@ -3029,6 +3033,114 @@
     }  "-O2 -mthumb" ]
 }
 
+# If board info says it only has 16M addressing space, return 0.
+# Otherwise, return 1.
+proc check_effective_target_nds32_full_addr_space { } {
+    if [board_info target exists addr16m] {
+	return 0
+    }
+    return 1;
+}
+
+# Return 1 if gp direct is enable by default.
+proc check_effective_target_nds32_gp_direct { } {
+    return [check_no_compiler_messages gp_direct object {
+	#ifdef __NDS32_GP_DIRECT__
+	int dummy;
+	#else
+	#error no GP_DIRECT
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-perf.
+proc check_effective_target_nds32_ext_perf { } {
+    return [check_no_compiler_messages ext_perf object {
+	#ifdef __NDS32_EXT_PERF__
+	int dummy;
+	#else
+	#error no EXT_PERF
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-perf2.
+proc check_effective_target_nds32_ext_perf2 { } {
+    return [check_no_compiler_messages ext_perf2 object {
+	#ifdef __NDS32_EXT_PERF2__
+	int dummy;
+	#else
+	#error no EXT_PERF2
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-string.
+proc check_effective_target_nds32_ext_string { } {
+    return [check_no_compiler_messages ext_string object {
+	#ifdef __NDS32_EXT_STRING__
+	int dummy;
+	#else
+	#error no EXT_STRING
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-fpu-sp or -mext-fpu-dp.
+proc check_effective_target_nds32_ext_fpu { } {
+    return [check_no_compiler_messages ext_fpu object {
+	#if defined(__NDS32_EXT_FPU_SP__) || defined(__NDS32_EXT_FPU_DP__)
+	int dummy;
+	#else
+	#error no support FPU
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-fpu-sp.
+proc check_effective_target_nds32_ext_fpu_sp { } {
+    return [check_no_compiler_messages ext_fpu_sp object {
+	#ifdef __NDS32_EXT_FPU_SP__
+	int dummy;
+	#else
+	#error no EXT_FPU_SP
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mext-fpu-dp.
+proc check_effective_target_nds32_ext_fpu_dp { } {
+    return [check_no_compiler_messages ext_fpu_dp object {
+	#ifdef __NDS32_EXT_FPU_DP__
+	int dummy;
+	#else
+	#error no EXT_FPU_DP
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target supporting -mreduced-regs.
+proc check_effective_target_nds32_reduced_regs { } {
+    return [check_no_compiler_messages reduced_regs object {
+	#ifdef __NDS32_REDUCED_REGS__
+	int dummy;
+	#else
+	#error no REDUCED_REGS
+	#endif
+    }]
+}
+
+# Return 1 if this is a nds32 target not supporting v3m ISA.
+proc check_effective_target_nds32_no_v3m { } {
+    return [check_no_compiler_messages no_v3m object {
+	#if !defined(__NDS32_BASELINE_V3M__)
+	int dummy;
+	#else
+	#error Support V3M ISA
+	#endif
+    }]
+}
+
 # Return 1 if this is a PowerPC target supporting -meabi.
 
 proc check_effective_target_powerpc_eabi_ok { } {
@@ -5875,6 +5987,7 @@
 	 || [istarget arc*-*-*]
 	 || [istarget avr*-*-*]
 	 || [istarget crisv32-*-*] || [istarget cris-*-*]
+	 || [istarget nds32*-*-*]
 	 || [istarget s390*-*-*]
 	 || [check_effective_target_arm_cortex_m] } {
 	return 1
diff -Nur gcc-4.9.4.orig/gcc/timevar.def gcc-4.9.4/gcc/timevar.def
--- gcc-4.9.4.orig/gcc/timevar.def	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/timevar.def	2016-08-08 20:37:53.574582455 +0200
@@ -167,6 +167,7 @@
 DEFTIMEVAR (TV_SCEV_CONST            , "scev constant prop")
 DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
 DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
+DEFTIMEVAR (TV_SWITCH_SHORTCUT       , "switch statement shortcuts")
 DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
 DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
diff -Nur gcc-4.9.4.orig/gcc/tree-loop-distribution.c gcc-4.9.4/gcc/tree-loop-distribution.c
--- gcc-4.9.4.orig/gcc/tree-loop-distribution.c	2015-02-20 08:32:08.000000000 +0100
+++ gcc-4.9.4/gcc/tree-loop-distribution.c	2016-08-08 20:37:53.574582455 +0200
@@ -1067,7 +1067,7 @@
 		      gimple_bb (DR_STMT (single_store))))
     plus_one = true;
 
-  if (single_store && !single_load)
+  if (single_store && !single_load && !flag_no_builtin)
     {
       gimple stmt = DR_STMT (single_store);
       tree rhs = gimple_assign_rhs1 (stmt);
@@ -1089,7 +1089,7 @@
       partition->niter = nb_iter;
       partition->plus_one = plus_one;
     }
-  else if (single_store && single_load)
+  else if (single_store && single_load && !flag_no_builtin)
     {
       gimple store = DR_STMT (single_store);
       gimple load = DR_STMT (single_load);
diff -Nur gcc-4.9.4.orig/gcc/tree-pass.h gcc-4.9.4/gcc/tree-pass.h
--- gcc-4.9.4.orig/gcc/tree-pass.h	2014-01-02 23:23:26.000000000 +0100
+++ gcc-4.9.4/gcc/tree-pass.h	2016-08-08 20:37:53.574582455 +0200
@@ -585,6 +585,7 @@
 extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_switch_shortcut (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff -Nur gcc-4.9.4.orig/gcc/tree-switch-shortcut.c gcc-4.9.4/gcc/tree-switch-shortcut.c
--- gcc-4.9.4.orig/gcc/tree-switch-shortcut.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/gcc/tree-switch-shortcut.c	2016-08-08 20:37:53.574582455 +0200
@@ -0,0 +1,423 @@
+/* Switch shortcutting optimization for GNU C
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Steve Ellcey (sellcey@imgtec.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file implements an optimization where, when a variable is set
+   to a constant value and there is a path that leads from this definition
+   to a switch statement that uses that variable as its controlling expression
+   we duplicate the blocks on this path and change the switch goto to a
+   direct goto to the label of the switch block that control would goto based
+   on the value of the variable.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "line-map.h"
+#include "params.h"
+#include "flags.h"
+#include "tree.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "cgraph.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-pass.h"
+#include "gimple-pretty-print.h"
+#include "cfgloop.h"
+#include "pointer-set.h"
+
+#include "tree-inline.h"
+#include "tree-ssa-alias.h"
+#include "tree-into-ssa.h"
+#include "tree-pass.h"
+
+#if 0
+#include "tree.h"
+#include "internal-fn.h"
+//#include "tree-flow.h"
+//#include "tree-flow-inline.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "cfgloop.h"
+#endif
+#include "params.h"
+
+/* Helper function for find_path, visited_bbs is used to make sure we don't
+   fall into an infinite loop.  */
+
+static int
+find_path_1(basic_block start_bb, basic_block end_bb, struct pointer_set_t *visited_bbs)
+{
+  edge_iterator ei;
+  edge e;
+
+  if (start_bb == end_bb) return 1;
+
+  if (!pointer_set_insert (visited_bbs, start_bb))
+    {
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (find_path_1 (e->dest, end_bb, visited_bbs))
+	  return 1;
+    }
+    return 0;
+}
+
+/* Return 1 if there is a path from start_bb to end_bb and 0 if there
+   is not.  There may be multiple paths from start_bb to end_bb.  */
+
+static int
+find_path(basic_block start_bb, basic_block end_bb)
+{
+  edge_iterator ei;
+  edge e;
+  struct pointer_set_t *visited_bbs;
+  int p = 0;
+
+  if (start_bb == end_bb) return 1;
+
+  visited_bbs = pointer_set_create ();
+  if (!pointer_set_insert (visited_bbs, start_bb))
+    {
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (find_path_1 (e->dest, end_bb, visited_bbs))
+	  {
+	    p = 1;
+	    break;
+	  }
+    }
+  pointer_set_destroy (visited_bbs);
+  return p;
+}
+
+
+/* We save the paths we want to copy in bbs_list_array.  n_bbs_list is the
+   number of paths saved, bbs_list_array[i] is the list of basic blocks in
+   one path.  Each path starts with the block where a variable is assigned
+   a constant value (bbs_list_array[i][0]) and ends with the switch statement
+   block (bbs_list_array[i][bbs_list_size[i]-2]) and then the block that the
+   switch statement is going to go to given the constant value of the
+   variable (bbs_list_array[i][bbs_list_size[i]-1]).  */
+
+static basic_block **bbs_list_array;
+static int *val_array;
+static int *bbs_list_size;
+static int max_path_count;
+static int max_insn_count;
+static int n_bbs_list;
+
+/* bbs_list[0] is the block with the switch statement,
+   bbs_list[n-1] is the block where the switch statement variable is assigned
+     a constant value,
+   The entries in between make a (reverse) path between the two.
+
+   We don't want to change bb_list, we want to leave that alone and
+   and copy the path to bbs_list_array so that we wind up with a list (array)
+   of paths that we want to update.  We also want to add the block that the
+   switch is going to go to on to the list so that we know which exit from
+   the switch statement is important.  */
+
+static void
+save_new_path (basic_block *bbs_list, int n, tree val)
+{
+  int i;
+  int insn_count;
+  basic_block bb;
+  edge switch_taken_edge;
+  gimple_stmt_iterator gsi;
+
+  if (n <= 1) return;
+
+  if (n_bbs_list >= max_path_count)
+    return;
+
+  /* Put the blocks in 'correct' order and add in where we want to go after
+     the switch statement, We want to leave bbs_list untouched for future
+     calls.  */
+
+  bbs_list_array[n_bbs_list] = XNEWVEC (basic_block, n+1);
+  for (i = 0; i < n; i++)
+    bbs_list_array[n_bbs_list][i] = bbs_list[n-i-1];
+
+  switch_taken_edge = find_taken_edge (bbs_list[0], val);
+  bbs_list_array[n_bbs_list][n] = switch_taken_edge->dest;
+
+  bbs_list_size[n_bbs_list] = n + 1;
+  val_array[n_bbs_list] = (int) TREE_INT_CST_LOW (val);
+
+  /* Count how many instructions are in the blocks we are going to
+     duplicate and if there are too many do not save this path
+     (return without incrementing n_bbs_list).  */
+
+  insn_count = 0;
+  for (i = 1; i < n; i++)
+    {
+      bb = bbs_list_array[n_bbs_list][i];
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+    }
+
+  if (insn_count > max_insn_count)
+    return;
+
+  n_bbs_list = n_bbs_list + 1;
+}
+
+/* switch_stmt is a switch statement whose switch index expression
+   is the variable expr.  We trace the value of the variable back
+   through any phi nodes looking for places where it gets a constant
+   value and save the path in bbs_list.  Then we call save_new_path
+   to create a list of such paths.  */
+
+static void
+process_switch (tree expr, gimple switch_stmt,
+	        struct pointer_set_t *visited_phis,
+	        basic_block *bbs_list, int n)
+{
+  gimple def_stmt;
+  tree var;
+  unsigned int i;
+  edge e;
+  edge_iterator ei;
+  basic_block bbx;
+  basic_block var_bb;
+  int e_count;
+
+  gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH);
+  var = SSA_NAME_VAR (expr);
+  def_stmt = SSA_NAME_DEF_STMT (expr);
+  var_bb = gimple_bb (def_stmt);
+
+  if (var == NULL || var_bb == NULL) return;
+
+  /* We have a variable definition (var) that is defined in var_bb,
+     We want to put the path from var_bb to the current bb into the
+     bbs_list.  If there is more then one path, skip this and don't
+     try to do the optimization.  */
+
+  bbx = bbs_list[n-1];
+  while (bbx != var_bb)
+   {
+     e_count = 0;
+     FOR_EACH_EDGE (e, ei, bbx->preds)
+       {
+         if (find_path (var_bb, e->src))
+	   {
+	     bbs_list[n] = e->src;
+	     n = n + 1;
+	     e_count = e_count + 1;
+	   }
+       }
+     if (e_count != 1) return;
+     bbx = bbs_list[n-1];
+   }
+
+  if ((gimple_code (def_stmt) == GIMPLE_PHI)
+       && !pointer_set_insert (visited_phis, def_stmt))
+    {
+      for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+	{
+	  tree arg = gimple_phi_arg_def (def_stmt, i);
+	  if (arg && (TREE_CODE (arg) == INTEGER_CST))
+	    {
+	      /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */
+	      bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+	      save_new_path(bbs_list, n + 1, arg);
+	    }
+	  else if (arg && (TREE_CODE (arg) == SSA_NAME))
+	    {
+		  bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+		  process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1);
+	    }
+	}
+    }
+}
+
+/* Find paths that lead from blocks where a variable is assigned a constant
+   value to a switch statement where that variable is used as the switch
+   index.  Save the paths in bbs_list_array so that they can be processed
+   by copy_switch_paths.  */
+
+static unsigned int
+find_switch_shortcuts (void)
+{
+  basic_block bb;
+  struct pointer_set_t *visited_phis;
+  basic_block *bbs_list;
+  int n = 1;
+
+  bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  visited_phis = pointer_set_create ();
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      gimple stmt = last_stmt (bb);
+      if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
+	{
+	  tree op = gimple_switch_index (stmt);
+	  tree var = SSA_NAME_VAR (op);
+	  if (var)
+	    {
+	      bbs_list[0] = bb;
+	      process_switch (op, stmt, visited_phis, bbs_list, n);
+	    }
+	}
+    }
+  pointer_set_destroy (visited_phis);
+  XDELETEVEC (bbs_list);
+  return 0;
+}
+
+/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list.
+   We free and recalculate all ssa and dominance information afterwords
+   because the regsion being copied is not really SESE and so we cannot
+   trust gimple_duplicate_sese_region to correctly update the dataflow
+   information.  */
+
+static void
+duplicate_blocks (basic_block *bb_list, int bb_count)
+{
+  edge orig_edge, exit_edge;
+
+  orig_edge = find_edge (bb_list[0], bb_list[1]);
+  exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]);
+  gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1], bb_count-2, NULL, true);
+  free_dominance_info (CDI_DOMINATORS);
+  update_ssa (TODO_update_ssa);
+  calculate_dominance_info (CDI_DOMINATORS);
+}
+
+/* Go through the paths saved in bbs_list_array and make copies of them.  */
+
+static void
+copy_switch_paths (void)
+{
+  int i;
+
+  /* Process each path in bbs_list_size.  */
+  for (i = 0; i < n_bbs_list; i++)
+    {
+    /* For each path in bbs_list_size loop through and copy each block in
+       the path (except the first on where the constant is assigned and
+       the final one where the switch statement goes to.  */
+
+    if (!single_pred_p (bbs_list_array[i][1]))
+      duplicate_blocks (bbs_list_array[i], bbs_list_size[i]);
+    }
+}
+
+static unsigned int
+do_switch_shortcut (void)
+{
+  int i;
+
+  n_bbs_list = 0;
+  max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS);
+  max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS);
+  val_array = XNEWVEC (int, max_path_count);
+  bbs_list_size = XNEWVEC (int, max_path_count);
+  bbs_list_array = XNEWVEC (basic_block *, max_path_count);
+  find_switch_shortcuts ();
+  copy_switch_paths ();
+  XDELETEVEC (val_array);
+  XDELETEVEC (bbs_list_size);
+  for (i = 0; i < n_bbs_list; i++)
+    XDELETEVEC (bbs_list_array[i]);
+  XDELETEVEC (bbs_list_array);
+  return 0;
+}
+
+/* The pass gate. */
+
+static bool
+gate_switch_shortcut (void)
+{
+   return flag_tree_switch_shortcut;
+}
+
+namespace {
+#if 0
+struct gimple_opt_pass pass_switch_shortcut =
+{
+ {
+  GIMPLE_PASS,
+  "switch_shortcut",			/* name */
+  OPTGROUP_NONE,                        /* optinfo_flags */
+  gate_switch_shortcut,			/* gate */
+  do_switch_shortcut,			/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_SWITCH_SHORTCUT,			/* tv_id */
+  PROP_cfg | PROP_ssa,			/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_cleanup_cfg | TODO_verify_all,	/* todo_flags_finish */
+ }
+};
+#endif
+
+const pass_data pass_data_switch_shortcut =
+{
+  GIMPLE_PASS, /* type */
+  "switch_shortcut", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_SWITCH_SHORTCUT, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  ( TODO_cleanup_cfg | TODO_verify_all), /* todo_flags_finish */
+};
+
+class pass_switch_shortcut : public gimple_opt_pass
+{
+public:
+  pass_switch_shortcut (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_switch_shortcut, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_switch_shortcut (); }
+  unsigned int execute () { return do_switch_shortcut (); }
+
+}; // class pass_convert_switch
+
+
+}
+
+gimple_opt_pass *
+make_pass_switch_shortcut (gcc::context *ctxt)
+{
+  return new pass_switch_shortcut (ctxt);
+}
diff -Nur gcc-4.9.4.orig/gcc/tree-vrp.c gcc-4.9.4/gcc/tree-vrp.c
--- gcc-4.9.4.orig/gcc/tree-vrp.c	2016-02-11 10:22:21.000000000 +0100
+++ gcc-4.9.4/gcc/tree-vrp.c	2016-08-08 20:37:53.578582611 +0200
@@ -9009,6 +9009,7 @@
      used for the comparison directly if we just massage the constant in the
      comparison.  */
   if (TREE_CODE (op0) == SSA_NAME
+      && has_single_use (op0)
       && TREE_CODE (op1) == INTEGER_CST)
     {
       gimple def_stmt = SSA_NAME_DEF_STMT (op0);
diff -Nur gcc-4.9.4.orig/gcc/varasm.c gcc-4.9.4/gcc/varasm.c
--- gcc-4.9.4.orig/gcc/varasm.c	2015-05-26 22:16:17.000000000 +0200
+++ gcc-4.9.4/gcc/varasm.c	2016-08-08 20:37:53.578582611 +0200
@@ -3243,7 +3243,7 @@
   TREE_CONSTANT_POOL_ADDRESS_P (symbol) = 1;
 
   rtl = gen_const_mem (TYPE_MODE (TREE_TYPE (exp)), symbol);
-  set_mem_attributes (rtl, exp, 1);
+  set_mem_attributes (rtl, decl, 1);
   set_mem_alias_set (rtl, 0);
 
   /* We cannot share RTX'es in pool entries.
diff -Nur gcc-4.9.4.orig/libcpp/configure gcc-4.9.4/libcpp/configure
--- gcc-4.9.4.orig/libcpp/configure	2016-08-03 07:09:47.000000000 +0200
+++ gcc-4.9.4/libcpp/configure	2016-08-08 20:37:53.578582611 +0200
@@ -7162,6 +7162,7 @@
 	i[34567]86-*-* | x86_64-*-solaris2.1[0-9]* | \
 	mips*-*-* | \
 	mmix-*-* | \
+	nds32*-*-* | \
 	powerpc*-*-* | \
 	rs6000*-*-* | \
 	s390*-*-* | \
diff -Nur gcc-4.9.4.orig/libcpp/configure.ac gcc-4.9.4/libcpp/configure.ac
--- gcc-4.9.4.orig/libcpp/configure.ac	2014-02-24 16:08:00.000000000 +0100
+++ gcc-4.9.4/libcpp/configure.ac	2016-08-08 20:37:53.578582611 +0200
@@ -191,6 +191,7 @@
 	i[34567]86-*-* | x86_64-*-solaris2.1[0-9]* | \
 	mips*-*-* | \
 	mmix-*-* | \
+	nds32*-*-* | \
 	powerpc*-*-* | \
 	rs6000*-*-* | \
 	s390*-*-* | \
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/crtzero.S gcc-4.9.4/libgcc/config/nds32/crtzero.S
--- gcc-4.9.4.orig/libgcc/config/nds32/crtzero.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/crtzero.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,103 +0,0 @@
-/* The startup code sample of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-!!==============================================================================
-!!
-!!      crtzero.S
-!!
-!!      This is JUST A SAMPLE of nds32 startup code !!
-!!      You can refer this content and implement
-!!      the actual one in newlib/mculib.
-!!
-!!==============================================================================
-
-!!------------------------------------------------------------------------------
-!! Jump to start up code
-!!------------------------------------------------------------------------------
-	.section	.nds32_init, "ax"
-	j	_start
-
-!!------------------------------------------------------------------------------
-!! Startup code implementation
-!!------------------------------------------------------------------------------
-	.section	.text
-	.global	_start
-	.weak	_SDA_BASE_
-	.weak	_FP_BASE_
-	.align	2
-	.func	_start
-	.type	_start, @function
-_start:
-.L_fp_gp_lp_init:
-	la	$fp, _FP_BASE_		! init $fp
-	la	$gp, _SDA_BASE_		! init $gp for small data access
-	movi	$lp, 0			! init $lp
-
-.L_stack_init:
-	la	$sp, _stack		! init $sp
-	movi	$r0, -8			! align $sp to 8-byte (use 0xfffffff8)
-	and	$sp, $sp, $r0		! align $sp to 8-byte (filter out lower 3-bit)
-
-.L_bss_init:
-	! clear BSS, this process can be 4 time faster if data is 4 byte aligned
-	! if so, use swi.p instead of sbi.p
-	! the related stuff are defined in linker script
-	la	$r0, _edata		! get the starting addr of bss
-	la	$r2, _end		! get ending addr of bss
-	beq	$r0, $r2, .L_call_main	! if no bss just do nothing
-	movi	$r1, 0			! should be cleared to 0
-.L_clear_bss:
-	sbi.p	$r1, [$r0], 1		! Set 0 to bss
-	bne	$r0, $r2, .L_clear_bss	! Still bytes left to set
-
-!.L_stack_heap_check:
-!	la	$r0, _end		! init heap_end
-!	s.w	$r0, heap_end		! save it
-
-
-!.L_init_argc_argv:
-!	! argc/argv initialization if necessary; default implementation is in crt1.o
-!	la	$r9, _arg_init		! load address of _arg_init?
-!	beqz	$r9, .L4		! has _arg_init? no, go check main()
-!	addi	$sp, $sp, -512		! allocate space for command line + arguments
-!	move	$r6, $sp		! r6 = buffer addr of cmd line
-!	move	$r0, $r6		! r0 = buffer addr of cmd line
-!	syscall	6002			! get cmd line
-!	move	$r0, $r6		! r0 = buffer addr of cmd line
-!	addi	$r1, $r6, 256		! r1 = argv
-!	jral	$r9			! init argc/argv
-!	addi	$r1, $r6, 256		! r1 = argv
-
-.L_call_main:
-	! call main() if main() is provided
-	la	$r15, main		! load address of main
-	jral	$r15			! call main
-
-.L_terminate_program:
-	syscall	0x1			! use syscall 0x1 to terminate program
-	.size	_start, .-_start
-	.end
-
-!! ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/initfini.c gcc-4.9.4/libgcc/config/nds32/initfini.c
--- gcc-4.9.4.orig/libgcc/config/nds32/initfini.c	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/initfini.c	2016-08-08 20:37:53.698587257 +0200
@@ -1,7 +1,7 @@
 /* .init/.fini section handling + C++ global constructor/destructor
    handling of Andes NDS32 cpu for GNU compiler.
    This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -25,6 +25,10 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <stddef.h>
+/* Need header file for `struct object' type.  */
+#include "../libgcc/unwind-dw2-fde.h"
+
 /*  Declare a pointer to void function type.  */
 typedef void (*func_ptr) (void);
 
@@ -42,11 +46,59 @@
    refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
    symbol in crtinit.o, where they are defined.  */
 
-static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
-     = { (func_ptr) (-1) };
+static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used))
+     = { (func_ptr) 0 };
+
+static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used))
+     = { (func_ptr) 0 };
+
+
+#ifdef SUPPORT_UNWINDING_DWARF2
+/* Preparation of exception handling with dwar2 mechanism registration.  */
+
+asm ("\n\
+	.section .eh_frame,\"aw\",@progbits\n\
+	.global __EH_FRAME_BEGIN__\n\
+	.type	__EH_FRAME_BEGIN__, @object\n\
+	.align 2\n\
+__EH_FRAME_BEGIN__:\n\
+	! Beginning location of eh_frame section\n\
+	.previous\n\
+");
+
+extern func_ptr __EH_FRAME_BEGIN__[];
+
 
-static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
-     = { (func_ptr) (-1) };
+/* Note that the following two functions are going to be chained into
+   constructor and destructor list, repectively.  So these two declarations
+   must be placed after __CTOR_LIST__ and __DTOR_LIST.  */
+extern void __nds32_register_eh(void) __attribute__((constructor, used));
+extern void __nds32_deregister_eh(void) __attribute__((destructor, used));
+
+/* Register the exception handling table as the first constructor.  */
+void
+__nds32_register_eh (void)
+{
+  static struct object object;
+  if (__register_frame_info)
+    __register_frame_info (__EH_FRAME_BEGIN__, &object);
+}
+
+/* Unregister the exception handling table as a deconstructor.  */
+void
+__nds32_deregister_eh (void)
+{
+  static int completed = 0;
+
+  if (completed)
+    return;
+
+  if (__deregister_frame_info)
+    __deregister_frame_info (__EH_FRAME_BEGIN__);
+
+  completed = 1;
+}
+#endif
 
 /* Run all the global destructors on exit from the program.  */
 
@@ -63,7 +115,7 @@
    same particular root executable or shared library file.  */
 
 static void __do_global_dtors (void)
-asm ("__do_global_dtors") __attribute__ ((section (".text")));
+asm ("__do_global_dtors") __attribute__ ((section (".text"), used));
 
 static void
 __do_global_dtors (void)
@@ -116,23 +168,37 @@
    last, these words naturally end up at the very ends of the two lists
    contained in these two sections.  */
 
-static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
+static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used))
      = { (func_ptr) 0 };
 
-static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
+static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used))
      = { (func_ptr) 0 };
 
+#ifdef SUPPORT_UNWINDING_DWARF2
+/* ZERO terminator in .eh_frame section.  */
+asm ("\n\
+	.section .eh_frame,\"aw\",@progbits\n\
+	.global __EH_FRAME_END__\n\
+	.type	__EH_FRAME_END__, @object\n\
+	.align 2\n\
+__EH_FRAME_END__:\n\
+	! End location of eh_frame section with ZERO terminator\n\
+	.word 0\n\
+	.previous\n\
+");
+#endif
+
 /* Run all global constructors for the program.
    Note that they are run in reverse order.  */
 
 static void __do_global_ctors (void)
-asm ("__do_global_ctors") __attribute__ ((section (".text")));
+asm ("__do_global_ctors") __attribute__ ((section (".text"), used));
 
 static void
 __do_global_ctors (void)
 {
   func_ptr *p;
-  for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
+  for (p = __CTOR_END__ - 1; *p; p--)
     (*p) ();
 }
 
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/adj_intr_lvl.inc gcc-4.9.4/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/adj_intr_lvl.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/adj_intr_lvl.inc	2016-08-08 20:37:53.698587257 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -26,13 +26,26 @@
 .macro ADJ_INTR_LVL
 #if defined(NDS32_NESTED) /* Nested handler.  */
 	mfsr	$r3, $PSW
+	/* By substracting 1 from $PSW, we can lower PSW.INTL
+	   and enable GIE simultaneously.  */
 	addi	$r3, $r3, #-0x1
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+  #endif
 	mtsr	$r3, $PSW
 #elif defined(NDS32_NESTED_READY) /* Nested ready handler.  */
 	/* Save ipc and ipsw and lower INT level.  */
 	mfsr	$r3, $PSW
 	addi	$r3, $r3, #-0x2
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+  #endif
 	mtsr	$r3, $PSW
 #else /* Not nested handler.  */
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    mfsr	$r3, $PSW
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+    mtsr	$r3, $PSW
+  #endif
 #endif
 .endm
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/excp_isr_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/excp_isr_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/excp_isr_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/excp_isr_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,133 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "save_mac_regs.inc"
-#include "save_fpu_regs.inc"
-#include "save_fpu_regs_00.inc"
-#include "save_fpu_regs_01.inc"
-#include "save_fpu_regs_02.inc"
-#include "save_fpu_regs_03.inc"
-#include "save_all.inc"
-#include "save_partial.inc"
-#include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
-#include "restore_fpu_regs_00.inc"
-#include "restore_fpu_regs_01.inc"
-#include "restore_fpu_regs_02.inc"
-#include "restore_fpu_regs_03.inc"
-#include "restore_fpu_regs.inc"
-#include "restore_all.inc"
-#include "restore_partial.inc"
-	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
-	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is 4-byte vector size version.
-  The "_4b" postfix was added for 4-byte version symbol.
-*/
-#ifdef NDS32_SAVE_ALL_REGS
-#if defined(NDS32_NESTED)
-	.globl	_nds32_e_sa_ns_4b
-	.type	_nds32_e_sa_ns_4b, @function
-_nds32_e_sa_ns_4b:
-#elif defined(NDS32_NESTED_READY)
-	.globl	_nds32_e_sa_nr_4b
-	.type	_nds32_e_sa_nr_4b, @function
-_nds32_e_sa_nr_4b:
-#else /* Not nested handler. */
-	.globl	_nds32_e_sa_nn_4b
-	.type	_nds32_e_sa_nn_4b, @function
-_nds32_e_sa_nn_4b:
-#endif /* endif for Nest Type */
-#else /* not NDS32_SAVE_ALL_REGS */
-#if defined(NDS32_NESTED)
-	.globl	_nds32_e_ps_ns_4b
-	.type	_nds32_e_ps_ns_4b, @function
-_nds32_e_ps_ns_4b:
-#elif defined(NDS32_NESTED_READY)
-	.globl	_nds32_e_ps_nr_4b
-	.type	_nds32_e_ps_nr_4b, @function
-_nds32_e_ps_nr_4b:
-#else /* Not nested handler. */
-	.globl	_nds32_e_ps_nn_4b
-	.type	_nds32_e_ps_nn_4b, @function
-_nds32_e_ps_nn_4b:
-#endif /* endif for Nest Type */
-#endif /* not NDS32_SAVE_ALL_REGS */
-
-/*
-  This is 4-byte vector size version.
-  The vector id was restored into $lp in vector by compiler.
-*/
-#ifdef NDS32_SAVE_ALL_REGS
-	SAVE_ALL_4B
-#else
-	SAVE_PARTIAL_4B
-#endif
-	/* Prepare to call 2nd level handler. */
-	la	$r2, _nds32_jmptbl_00
-	lw	$r2, [$r2 + $r0 << #2]
-	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
-	jral    $r2
-	/* Restore used registers. */
-#ifdef NDS32_SAVE_ALL_REGS
-	RESTORE_ALL
-#else
-	RESTORE_PARTIAL
-#endif
-	iret
-
-#ifdef NDS32_SAVE_ALL_REGS
-#if defined(NDS32_NESTED)
-	.size	_nds32_e_sa_ns_4b, .-_nds32_e_sa_ns_4b
-#elif defined(NDS32_NESTED_READY)
-	.size	_nds32_e_sa_nr_4b, .-_nds32_e_sa_nr_4b
-#else /* Not nested handler. */
-	.size	_nds32_e_sa_nn_4b, .-_nds32_e_sa_nn_4b
-#endif /* endif for Nest Type */
-#else /* not NDS32_SAVE_ALL_REGS */
-#if defined(NDS32_NESTED)
-	.size	_nds32_e_ps_ns_4b, .-_nds32_e_ps_ns_4b
-#elif defined(NDS32_NESTED_READY)
-	.size	_nds32_e_ps_nr_4b, .-_nds32_e_ps_nr_4b
-#else /* Not nested handler. */
-	.size	_nds32_e_ps_nn_4b, .-_nds32_e_ps_nn_4b
-#endif /* endif for Nest Type */
-#endif /* not NDS32_SAVE_ALL_REGS */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/excp_isr.S gcc-4.9.4/libgcc/config/nds32/isr-library/excp_isr.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/excp_isr.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/excp_isr.S	2016-08-08 20:37:53.698587257 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -23,6 +23,7 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "save_usr_regs.inc"
 #include "save_mac_regs.inc"
 #include "save_fpu_regs.inc"
 #include "save_fpu_regs_00.inc"
@@ -32,35 +33,33 @@
 #include "save_all.inc"
 #include "save_partial.inc"
 #include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
 #include "restore_fpu_regs_00.inc"
 #include "restore_fpu_regs_01.inc"
 #include "restore_fpu_regs_02.inc"
 #include "restore_fpu_regs_03.inc"
 #include "restore_fpu_regs.inc"
+#include "restore_mac_regs.inc"
+#include "restore_usr_regs.inc"
 #include "restore_all.inc"
 #include "restore_partial.inc"
+
 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
 	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is original 16-byte vector size version.
-*/
+
+/* First Level Handlers
+   1. First Level Handlers are invokded in vector section via jump instruction
+      with specific names for different configurations.
+   2. Naming Format: _nds32_e_SR_NT for exception handlers.
+                     _nds32_i_SR_NT for interrupt handlers.
+     2.1 All upper case letters are replaced with specific lower case letters encodings.
+     2.2 SR -- Saved Registers
+         sa: Save All regs (context)
+         ps: Partial Save (all caller-saved regs)
+     2.3 NT -- Nested Type
+         ns: nested
+         nn: not nested
+         nr: nested ready */
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.globl	_nds32_e_sa_ns
@@ -91,21 +90,26 @@
 #endif /* endif for Nest Type */
 #endif /* not NDS32_SAVE_ALL_REGS */
 
-/*
-  This is 16-byte vector size version.
-  The vector id was restored into $r0 in vector by compiler.
-*/
+
+/* For 4-byte vector size version, the vector id is
+   extracted from $ITYPE and is set into $r0 by library.
+   For 16-byte vector size version, the vector id
+   is set into $r0 in vector section by compiler.  */
+
+/* Save used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
         SAVE_ALL
 #else
         SAVE_PARTIAL
 #endif
+
 	/* Prepare to call 2nd level handler. */
 	la	$r2, _nds32_jmptbl_00
 	lw	$r2, [$r2 + $r0 << #2]
 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
 	jral    $r2
-	/* Restore used registers. */
+
+/* Restore used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
 	RESTORE_ALL
 #else
@@ -113,6 +117,7 @@
 #endif
 	iret
 
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.size	_nds32_e_sa_ns, .-_nds32_e_sa_ns
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/intr_isr_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/intr_isr_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/intr_isr_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/intr_isr_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,134 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include "save_mac_regs.inc"
-#include "save_fpu_regs.inc"
-#include "save_fpu_regs_00.inc"
-#include "save_fpu_regs_01.inc"
-#include "save_fpu_regs_02.inc"
-#include "save_fpu_regs_03.inc"
-#include "save_all.inc"
-#include "save_partial.inc"
-#include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
-#include "restore_fpu_regs_00.inc"
-#include "restore_fpu_regs_01.inc"
-#include "restore_fpu_regs_02.inc"
-#include "restore_fpu_regs_03.inc"
-#include "restore_fpu_regs.inc"
-#include "restore_all.inc"
-#include "restore_partial.inc"
-	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
-	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is 4-byte vector size version.
-  The "_4b" postfix was added for 4-byte version symbol.
-*/
-#ifdef NDS32_SAVE_ALL_REGS
-#if defined(NDS32_NESTED)
-	.globl	_nds32_i_sa_ns_4b
-	.type	_nds32_i_sa_ns_4b, @function
-_nds32_i_sa_ns_4b:
-#elif defined(NDS32_NESTED_READY)
-	.globl	_nds32_i_sa_nr_4b
-	.type	_nds32_i_sa_nr_4b, @function
-_nds32_i_sa_nr_4b:
-#else /* Not nested handler. */
-	.globl	_nds32_i_sa_nn_4b
-	.type	_nds32_i_sa_nn_4b, @function
-_nds32_i_sa_nn_4b:
-#endif /* endif for Nest Type */
-#else /* not NDS32_SAVE_ALL_REGS */
-#if defined(NDS32_NESTED)
-	.globl	_nds32_i_ps_ns_4b
-	.type	_nds32_i_ps_ns_4b, @function
-_nds32_i_ps_ns_4b:
-#elif defined(NDS32_NESTED_READY)
-	.globl	_nds32_i_ps_nr_4b
-	.type	_nds32_i_ps_nr_4b, @function
-_nds32_i_ps_nr_4b:
-#else /* Not nested handler. */
-	.globl	_nds32_i_ps_nn_4b
-	.type	_nds32_i_ps_nn_4b, @function
-_nds32_i_ps_nn_4b:
-#endif /* endif for Nest Type */
-#endif /* not NDS32_SAVE_ALL_REGS */
-
-/*
-  This is 4-byte vector size version.
-  The vector id was restored into $lp in vector by compiler.
-*/
-#ifdef NDS32_SAVE_ALL_REGS
-	SAVE_ALL_4B
-#else
-        SAVE_PARTIAL_4B
-#endif
-	/* Prepare to call 2nd level handler. */
-	la	$r2, _nds32_jmptbl_00
-	lw	$r2, [$r2 + $r0 << #2]
-	addi    $r0, $r0, #-9	/* Make interrput vector id zero-based.  */
-	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
-	jral    $r2
-	/* Restore used registers. */
-#ifdef NDS32_SAVE_ALL_REGS
-	RESTORE_ALL
-#else
-	RESTORE_PARTIAL
-#endif
-	iret
-
-#ifdef NDS32_SAVE_ALL_REGS
-#if defined(NDS32_NESTED)
-	.size	_nds32_i_sa_ns_4b, .-_nds32_i_sa_ns_4b
-#elif defined(NDS32_NESTED_READY)
-	.size	_nds32_i_sa_nr_4b, .-_nds32_i_sa_nr_4b
-#else /* Not nested handler. */
-	.size	_nds32_i_sa_nn_4b, .-_nds32_i_sa_nn_4b
-#endif /* endif for Nest Type */
-#else /* not NDS32_SAVE_ALL_REGS */
-#if defined(NDS32_NESTED)
-	.size	_nds32_i_ps_ns_4b, .-_nds32_i_ps_ns_4b
-#elif defined(NDS32_NESTED_READY)
-	.size	_nds32_i_ps_nr_4b, .-_nds32_i_ps_nr_4b
-#else /* Not nested handler. */
-	.size	_nds32_i_ps_nn_4b, .-_nds32_i_ps_nn_4b
-#endif /* endif for Nest Type */
-#endif /* not NDS32_SAVE_ALL_REGS */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/intr_isr.S gcc-4.9.4/libgcc/config/nds32/isr-library/intr_isr.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/intr_isr.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/intr_isr.S	2016-08-08 20:37:53.698587257 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -23,6 +23,7 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "save_usr_regs.inc"
 #include "save_mac_regs.inc"
 #include "save_fpu_regs.inc"
 #include "save_fpu_regs_00.inc"
@@ -32,35 +33,33 @@
 #include "save_all.inc"
 #include "save_partial.inc"
 #include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
 #include "restore_fpu_regs_00.inc"
 #include "restore_fpu_regs_01.inc"
 #include "restore_fpu_regs_02.inc"
 #include "restore_fpu_regs_03.inc"
 #include "restore_fpu_regs.inc"
+#include "restore_mac_regs.inc"
+#include "restore_usr_regs.inc"
 #include "restore_all.inc"
 #include "restore_partial.inc"
+
 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
 	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is original 16-byte vector size version.
-*/
+
+/* First Level Handlers
+   1. First Level Handlers are invokded in vector section via jump instruction
+      with specific names for different configurations.
+   2. Naming Format: _nds32_e_SR_NT for exception handlers.
+                     _nds32_i_SR_NT for interrupt handlers.
+     2.1 All upper case letters are replaced with specific lower case letters encodings.
+     2.2 SR -- Saved Registers
+         sa: Save All regs (context)
+         ps: Partial Save (all caller-saved regs)
+     2.3 NT -- Nested Type
+         ns: nested
+         nn: not nested
+         nr: nested ready */
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.globl	_nds32_i_sa_ns
@@ -91,21 +90,36 @@
 #endif /* endif for Nest Type */
 #endif /* not NDS32_SAVE_ALL_REGS */
 
-/*
-  This is 16-byte vector size version.
-  The vector id was restored into $r0 in vector by compiler.
-*/
+
+/* For 4-byte vector size version, the vector id is
+   extracted from $ITYPE and is set into $r0 by library.
+   For 16-byte vector size version, the vector id
+   is set into $r0 in vector section by compiler.  */
+
+/* Save used registers first.  */
 #ifdef NDS32_SAVE_ALL_REGS
         SAVE_ALL
 #else
         SAVE_PARTIAL
 #endif
-	/* Prepare to call 2nd level handler. */
+
+/* According to vector size, we need to have different implementation.  */
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* Prepare to call 2nd level handler.  */
+	la	$r2, _nds32_jmptbl_00
+	lw	$r2, [$r2 + $r0 << #2]
+	addi    $r0, $r0, #-9	/* Make interrput vector id zero-based.  */
+	ADJ_INTR_LVL	/* Adjust INTR level.  $r3 is clobbered.  */
+	jral    $r2
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+	/* Prepare to call 2nd level handler.  */
 	la	$r2, _nds32_jmptbl_09	/* For zero-based vcetor id.  */
 	lw	$r2, [$r2 + $r0 << #2]
 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
 	jral    $r2
-	/* Restore used registers. */
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* Restore used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
 	RESTORE_ALL
 #else
@@ -113,6 +127,7 @@
 #endif
 	iret
 
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.size	_nds32_i_sa_ns, .-_nds32_i_sa_ns
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid00.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid00.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid00.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid00.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid01.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid01.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid01.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid01.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid02.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid02.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid02.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid02.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid03.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid03.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid03.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid03.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid04.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid04.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid04.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid04.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid05.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid05.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid05.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid05.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid06.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid06.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid06.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid06.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid07.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid07.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid07.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid07.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid08.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid08.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid08.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid08.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid09.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid09.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid09.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid09.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid10.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid10.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid10.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid10.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid11.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid11.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid11.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid11.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid12.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid12.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid12.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid12.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid13.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid13.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid13.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid13.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid14.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid14.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid14.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid14.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid15.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid15.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid15.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid15.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid16.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid16.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid16.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid16.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid17.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid17.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid17.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid17.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid18.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid18.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid18.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid18.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid19.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid19.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid19.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid19.S	2016-08-08 20:37:53.702587412 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid20.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid20.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid20.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid20.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid21.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid21.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid21.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid21.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid22.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid22.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid22.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid22.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid23.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid23.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid23.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid23.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid24.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid24.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid24.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid24.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid25.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid25.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid25.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid25.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid26.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid26.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid26.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid26.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid27.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid27.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid27.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid27.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid28.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid28.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid28.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid28.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid29.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid29.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid29.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid29.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid30.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid30.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid30.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid30.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid31.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid31.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid31.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid31.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid32.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid32.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid32.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid32.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid33.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid33.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid33.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid33.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid34.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid34.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid34.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid34.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid35.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid35.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid35.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid35.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid36.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid36.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid36.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid36.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid37.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid37.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid37.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid37.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid38.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid38.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid38.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid38.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid39.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid39.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid39.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid39.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid40.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid40.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid40.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid40.S	2016-08-08 20:37:53.706587567 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid41.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid41.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid41.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid41.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid42.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid42.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid42.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid42.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid43.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid43.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid43.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid43.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid44.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid44.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid44.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid44.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid45.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid45.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid45.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid45.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid46.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid46.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid46.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid46.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid47.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid47.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid47.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid47.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid48.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid48.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid48.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid48.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid49.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid49.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid49.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid49.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid50.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid50.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid50.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid50.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid51.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid51.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid51.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid51.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid52.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid52.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid52.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid52.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid53.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid53.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid53.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid53.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid54.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid54.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid54.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid54.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid55.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid55.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid55.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid55.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid56.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid56.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid56.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid56.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid57.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid57.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid57.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid57.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid58.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid58.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid58.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid58.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid59.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid59.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid59.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid59.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid60.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid60.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid60.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid60.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid61.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid61.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid61.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid61.S	2016-08-08 20:37:53.710587722 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid62.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid62.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid62.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid62.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid63.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid63.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid63.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid63.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid64.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid64.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid64.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid64.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid65.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid65.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid65.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid65.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid66.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid66.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid66.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid66.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid67.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid67.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid67.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid67.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid68.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid68.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid68.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid68.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid69.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid69.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid69.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid69.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid70.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid70.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid70.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid70.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid71.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid71.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid71.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid71.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid72.S gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid72.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/jmptbl_vid72.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/jmptbl_vid72.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/nmih.S gcc-4.9.4/libgcc/config/nds32/isr-library/nmih.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/nmih.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/nmih.S	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/reset_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/reset_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/reset_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/reset_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,131 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section .nds32_isr, "ax"	/* Put it in the section of 1st level handler.  */
-	.align	1
-	.weak	_SDA_BASE_	/* For reset handler only.  */
-	.weak	_FP_BASE_	/* For reset handler only.  */
-	.weak	_nds32_init_mem	/* User defined memory initialization function.  */
-	.globl	_start
-	.globl	_nds32_reset_4b
-	.type	_nds32_reset_4b, @function
-_nds32_reset_4b:
-_start:
-#ifdef  NDS32_EXT_EX9
-	.no_ex9_begin
-#endif
-	/* Handle NMI and warm boot if any of them exists.  */
-	beqz	$sp, 1f		/* Reset, NMI or warm boot?  */
-	/* Either NMI or warm boot; save all regs.  */
-
-	/* Preserve registers for context-switching.  */
-#ifdef __NDS32_REDUCED_REGS__
-	/* For 16-reg mode.  */
-	smw.adm $r0, [$sp], $r10, #0x0
-	smw.adm $r15, [$sp], $r15, #0xf
-#else
-	/* For 32-reg mode.  */
-	smw.adm $r0, [$sp], $r27, #0xf
-#endif
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
-#endif
-
-	la	$gp, _SDA_BASE_	/* Init GP for small data access.  */
-	move	$r0, $sp	/* Init parameter.  */
-	mfsr	$r1, $ITYPE	/* Check ITYPE for NMI or warm boot.  */
-	andi	$r1, $r1, #0xf
-	addi	$r1, $r1, #-1
-	beqz	$r1, 2f		/* Warm boot if true.  */
-	l.w	$r15, _nds32_nmih	/* Load NMI handler.  */
-	j	3f
-2:
-	l.w	$r15, _nds32_wrh	/* Load warm boot handler.  */
-3:
-	beqz    $r15, 1f	/* If no handler, do cold boot.  */
-	jral    $r15		/* Call handler.  */
-	bnez    $r0, 1f		/* If fail to resume, do cold boot.  */
-
-	/* Restore registers for context-switching.  */
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
-	mtusr   $r1, $IFC_LP
-#endif
-#ifdef __NDS32_REDUCED_REGS__
-	/* For 16-reg mode.  */
-	lmw.bim	$r15, [$sp], $r15, #0xf
-	lmw.bim	$r0, [$sp], $r10, #0x0
-#else
-	/* For 32-reg mode.  */
-	lmw.bim $r0, [$sp], $r27, #0xf
-#endif
-	iret	/* Resume operation.  */
-
-
-1:	/* Cold boot.  */
-	/* With vector ID feature, set default vector size to 4B.  */
-	/* Set IVB.ESZ = 0 (vector table entry size = 4 bytes)  */
-	mfsr    $r0, $IVB
-	li      $r1, #0xc000
-	or      $r0, $r0, $r1
-	xor     $r0, $r0, $r1
-	mtsr    $r0, $IVB
-	dsb
-
-	la	$gp, _SDA_BASE_		/* Init $gp.  */
-	la	$fp, _FP_BASE_		/* Init $fp.  */
-	la	$sp, _stack		/* Init $sp.  */
-#ifdef  NDS32_EXT_EX9
-/*
- *	Initialize the table base of EX9 instruction
- *	ex9 generation needs to disable before the ITB is set
- */
-	mfsr    $r0, $MSC_CFG	/* Check if HW support of EX9.  */
-	srli	$r0, $r0, 24
-	andi	$r0, $r0, 0x1
-	beqz	$r0, 4f		/* Zero means HW does not support EX9.  */
-	la      $r0, _ITB_BASE_	/* Init $ITB.  */
-	mtusr   $r0, $ITB
-	.no_ex9_end
-4:
-#endif
-	la	$r15, _nds32_init_mem	/* Call DRAM init. _nds32_init_mem
-					  may written by C language.  */
-	beqz	$r15, 6f
-	jral	$r15
-6:
-	l.w	$r15, _nds32_jmptbl_00	/* Load reset handler.  */
-	jral	$r15
-/* Reset handler() should never return in a RTOS or non-OS system.
-   In case it does return, an exception will be generated.
-   This exception will be caught either by default break handler or by EDM.
-   Default break handle may just do an infinite loop.
-   EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */
-5:
-	break    #0x7fff
-	.size	_nds32_reset_4b, .-_nds32_reset_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/reset.S gcc-4.9.4/libgcc/config/nds32/isr-library/reset.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/reset.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/reset.S	2016-08-08 20:37:53.714587877 +0200
@@ -26,22 +26,18 @@
 	.section .nds32_isr, "ax"	/* Put it in the section of 1st level handler.  */
 	.align	1
 	.weak	_SDA_BASE_	/* For reset handler only.  */
-	.weak	_FP_BASE_	/* For reset handler only.  */
 	.weak	_nds32_init_mem	/* User defined memory initialization function.  */
 	.globl	_start
 	.globl	_nds32_reset
 	.type	_nds32_reset, @function
 _nds32_reset:
 _start:
-#ifdef  NDS32_EXT_EX9
-	.no_ex9_begin
-#endif
 	/* Handle NMI and warm boot if any of them exists.  */
 	beqz	$sp, 1f		/* Reset, NMI or warm boot?  */
 	/* Either NMI or warm boot; save all regs.  */
 
 	/* Preserve registers for context-switching.  */
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	/* For 16-reg mode.  */
 	smw.adm $r0, [$sp], $r10, #0x0
 	smw.adm $r15, [$sp], $r15, #0xf
@@ -49,10 +45,9 @@
 	/* For 32-reg mode.  */
 	smw.adm $r0, [$sp], $r27, #0xf
 #endif
-#ifdef NDS32_EXT_IFC
+#if __NDS32_EXT_IFC__
 	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
+	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte alignment.  */
 #endif
 
 	la	$gp, _SDA_BASE_	/* Init GP for small data access.  */
@@ -71,12 +66,11 @@
 	bnez    $r0, 1f		/* If fail to resume, do cold boot.  */
 
 	/* Restore registers for context-switching.  */
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
+#if __NDS32_EXT_IFC__
+	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep stack 8-byte alignment.  */
 	mtusr   $r1, $IFC_LP
 #endif
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	/* For 16-reg mode.  */
 	lmw.bim	$r15, [$sp], $r15, #0xf
 	lmw.bim	$r0, [$sp], $r10, #0x0
@@ -88,6 +82,17 @@
 
 
 1:	/* Cold boot.  */
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* With vector ID feature for v3 architecture, default vector size is 4-byte.  */
+	/* Set IVB.ESZ = 0 (vector table entry size = 4 bytes)  */
+	mfsr    $r0, $IVB
+	li      $r1, #0xc000
+	or      $r0, $r0, $r1
+	xor     $r0, $r0, $r1
+	mtsr    $r0, $IVB
+	dsb
+#else
+	/* There is no vector ID feature, so the vector size must be 16-byte.  */
 	/* Set IVB.ESZ = 1 (vector table entry size = 16 bytes)  */
 	mfsr    $r0, $IVB
 	li	$r1, #0xffff3fff
@@ -95,36 +100,54 @@
 	ori	$r0, $r0, #0x4000
 	mtsr    $r0, $IVB
 	dsb
+#endif
 
 	la	$gp, _SDA_BASE_		/* Init $gp.  */
-	la	$fp, _FP_BASE_		/* Init $fp.  */
 	la	$sp, _stack		/* Init $sp.  */
-#ifdef  NDS32_EXT_EX9
-/*
- *	Initialize the table base of EX9 instruction
- *	ex9 generation needs to disable before the ITB is set
- */
-	mfsr    $r0, $MSC_CFG	/* Check if HW support of EX9.  */
+
+#if __NDS32_EXT_EX9__
+.L_init_itb:
+	/* Initialization for Instruction Table Base (ITB).
+	   The symbol _ITB_BASE_ is determined by Linker.
+	   Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set.  */
+	mfsr    $r0, $MSC_CFG
 	srli	$r0, $r0, 24
 	andi	$r0, $r0, 0x1
-	beqz	$r0, 4f		/* Zero means HW does not support EX9.  */
-	la      $r0, _ITB_BASE_	/* Init $ITB.  */
+	beqz	$r0, 4f		/* Fall through ?  */
+	la      $r0, _ITB_BASE_
 	mtusr   $r0, $ITB
-	.no_ex9_end
 4:
 #endif
-	la	$r15, _nds32_init_mem	/* Call DRAM init. _nds32_init_mem
-					  may written by C language.  */
+
+#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__
+.L_init_fpu:
+	/* Initialize FPU
+	   Set FUCOP_CTL.CP0EN (fucpr.b'0).  */
+	mfsr    $r0, $FUCOP_CTL
+	ori     $r0, $r0, 0x1
+	mtsr    $r0, $FUCOP_CTL
+	dsb
+	/* According to [bugzilla #9425], set flush-to-zero mode.
+	   That is, set $FPCSR.DNZ(b'12) = 1.  */
+	FMFCSR	$r0
+	ori	$r0, $r0, 0x1000
+	FMTCSR	$r0
+	dsb
+#endif
+
+	/* Call DRAM init. _nds32_init_mem may written by C language.  */
+	la	$r15, _nds32_init_mem
 	beqz	$r15, 6f
 	jral	$r15
 6:
 	l.w	$r15, _nds32_jmptbl_00	/* Load reset handler.  */
 	jral	$r15
-/* Reset handler() should never return in a RTOS or non-OS system.
-   In case it does return, an exception will be generated.
-   This exception will be caught either by default break handler or by EDM.
-   Default break handle may just do an infinite loop.
-   EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */
+
+	/* Reset handler() should never return in a RTOS or non-OS system.
+	   In case it does return, an exception will be generated.
+	   This exception will be caught either by default break handler or by EDM.
+	   Default break handle may just do an infinite loop.
+	   EDM will notify GDB and GDB will regain control when the ID is 0x7fff.  */
 5:
 	break    #0x7fff
 	.size	_nds32_reset, .-_nds32_reset
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_all.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_all.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_all.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_all.inc	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -31,15 +31,11 @@
 	mtsr	$r2, $IPSW
 	RESTORE_FPU_REGS
 	RESTORE_MAC_REGS
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
-	mtusr   $r1, $IFC_LP
-#endif
-#ifdef __NDS32_REDUCED_REGS__
+  RESTORE_USR_REGS
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	lmw.bim $r0, [$sp], $r10, #0x0	/* Restore all regs.  */
 	lmw.bim $r15, [$sp], $r15, #0xf
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	lmw.bim $r0, [$sp], $r27, #0xf	/* Restore all regs.  */
 #endif
 .endm
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_00.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_00.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_00.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_00.inc	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_01.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_01.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_01.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_01.inc	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_02.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_02.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_02.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_02.inc	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_03.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_03.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs_03.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs_03.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_fpu_regs.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_fpu_regs.inc	2016-08-08 20:37:53.714587877 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_mac_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_mac_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_mac_regs.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_mac_regs.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,7 +24,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 .macro RESTORE_MAC_REGS
-#ifdef NDS32_DX_REGS
+#if __NDS32_DX_REGS__
 	lmw.bim	$r1, [$sp], $r4, #0x0
 	mtusr	$r1, $d0.lo
 	mtusr	$r2, $d0.hi
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_partial.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_partial.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_partial.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_partial.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -31,15 +31,11 @@
 	mtsr $r1, $IPC	/* Set IPC.  */
 	mtsr $r2, $IPSW	/* Set IPSW.  */
 #endif
-	RESTORE_FPU_REGS
-	RESTORE_MAC_REGS
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
-	mtusr   $r1, $IFC_LP
-#endif
+  RESTORE_FPU_REGS
+  RESTORE_MAC_REGS
+  RESTORE_USR_REGS
 	lmw.bim $r0, [$sp], $r5, #0x0	/* Restore all regs.  */
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	lmw.bim $r15, [$sp], $r15, #0x2
 #else
 	lmw.bim $r15, [$sp], $r27, #0x2	/* Restore all regs.  */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_usr_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/restore_usr_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/restore_usr_regs.inc	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/restore_usr_regs.inc	2016-08-08 20:37:53.718588032 +0200
@@ -0,0 +1,42 @@
+/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+.macro RESTORE_USR_REGS
+#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  lmw.bim $r1, [$sp], $r4, #0x0
+  mtusr   $r1, $IFC_LP
+  mtusr   $r2, $LB
+  mtusr   $r3, $LE
+  mtusr   $r4, $LC
+#elif __NDS32_EXT_IFC__
+  lmw.bim	$r1, [$sp], $r2, #0x0
+  mtusr   $r1, $IFC_LP
+#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+  lmw.bim $r1, [$sp], $r4, #0x0
+  mtusr   $r1, $LB
+  mtusr   $r2, $LE
+  mtusr   $r3, $LC
+#endif
+.endm
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_all.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_all.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_all.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_all.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -23,45 +23,42 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-.macro SAVE_ALL_4B
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_ISR_VECTOR_SIZE_4__
+
+/* If vector size is 4-byte, we have to save registers
+   in the macro implementation.  */
+.macro SAVE_ALL
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	smw.adm $r15, [$sp], $r15, #0xf
 	smw.adm $r0, [$sp], $r10, #0x0
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	smw.adm $r0, [$sp], $r27, #0xf
-#endif /* not __NDS32_REDUCED_REGS__ */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
 #endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 	mfsr	$r1, $IPC	/* Get IPC.  */
 	mfsr	$r2, $IPSW	/* Get IPSW.  */
 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
 	srli	$r0, $r0, #5
-#ifdef __NDS32_ISA_V2__
 	andi	$r0, $r0, #127
-#else
-	fexti33	$r0, #6
-#endif
 .endm
 
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* If vector size is 16-byte, some works can be done in
+   the vector section generated by compiler, so that we
+   can implement less in the macro.  */
 .macro SAVE_ALL
-/* SAVE_REG_TBL code has been moved to
-   vector table generated by compiler.  */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
-#endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 	mfsr	$r1, $IPC	/* Get IPC.  */
 	mfsr	$r2, $IPSW	/* Get IPSW.  */
 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
 .endm
+
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_00.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_00.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_00.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_00.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_01.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_01.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_01.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_01.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_02.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_02.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_02.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_02.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_03.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_03.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs_03.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs_03.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_fpu_regs.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_fpu_regs.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_mac_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_mac_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_mac_regs.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_mac_regs.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,7 +24,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 .macro SAVE_MAC_REGS
-#ifdef NDS32_DX_REGS
+#if __NDS32_DX_REGS__
 	mfusr	$r1, $d0.lo
 	mfusr	$r2, $d0.hi
 	mfusr	$r3, $d1.lo
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_partial.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_partial.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_partial.inc	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_partial.inc	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -23,20 +23,20 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-.macro SAVE_PARTIAL_4B
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_ISR_VECTOR_SIZE_4__
+
+/* If vector size is 4-byte, we have to save registers
+   in the macro implementation.  */
+.macro SAVE_PARTIAL
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	smw.adm $r15, [$sp], $r15, #0x2
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	smw.adm $r15, [$sp], $r27, #0x2
-#endif /* not __NDS32_REDUCED_REGS__ */
-	smw.adm $r0, [$sp], $r5, #0x0
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
 #endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+	smw.adm $r0, [$sp], $r5, #0x0
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
        mfsr    $r1, $IPC       /* Get IPC.  */
        mfsr    $r2, $IPSW      /* Get IPSW.  */
@@ -44,26 +44,24 @@
 #endif
 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
 	srli	$r0, $r0, #5
-#ifdef __NDS32_ISA_V2__
 	andi	$r0, $r0, #127
-#else
-	fexti33	$r0, #6
-#endif
 .endm
 
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* If vector size is 16-byte, some works can be done in
+   the vector section generated by compiler, so that we
+   can implement less in the macro.  */
+
 .macro SAVE_PARTIAL
-/* SAVE_CALLER_REGS code has been moved to
-   vector table generated by compiler.  */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
-#endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
        mfsr    $r1, $IPC       /* Get IPC.  */
        mfsr    $r2, $IPSW      /* Get IPSW.  */
        smw.adm $r1, [$sp], $r2, #0x0   /* Push IPC, IPSW.  */
 #endif
 .endm
+
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_usr_regs.inc gcc-4.9.4/libgcc/config/nds32/isr-library/save_usr_regs.inc
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/save_usr_regs.inc	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/save_usr_regs.inc	2016-08-08 20:37:53.718588032 +0200
@@ -0,0 +1,44 @@
+/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+.macro SAVE_USR_REGS
+/* Store User Special Registers according to supported ISA extension
+   !!! WATCH OUT !!! Take care of 8-byte alignment issue.  */
+#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  mfusr   $r1, $IFC_LP
+  mfusr   $r2, $LB
+  mfusr   $r3, $LE
+  mfusr   $r4, $LC
+  smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok!  */
+#elif __NDS32_EXT_IFC__
+  mfusr   $r1, $IFC_LP
+  smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte aligned.  */
+#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  mfusr   $r1, $LB
+  mfusr   $r2, $LE
+  mfusr   $r3, $LC
+  smw.adm $r1, [$sp], $r4, #0x0	/* Save extra $r4 to keep stack 8-byte aligned.  */
+#endif
+.endm
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid00_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid00_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid00_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid00_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.00, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_00_4b
-	.type	_nds32_vector_00_4b, @function
-_nds32_vector_00_4b:
-1:
-	j	1b
-	.size	_nds32_vector_00_4b, .-_nds32_vector_00_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid00.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid00.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid00.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid00.S	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.00, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_00
 	.type	_nds32_vector_00, @function
 _nds32_vector_00:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid01_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid01_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid01_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid01_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.01, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_01_4b
-	.type	_nds32_vector_01_4b, @function
-_nds32_vector_01_4b:
-1:
-	j	1b
-	.size	_nds32_vector_01_4b, .-_nds32_vector_01_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid01.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid01.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid01.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid01.S	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.01, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_01
 	.type	_nds32_vector_01, @function
 _nds32_vector_01:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid02_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid02_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid02_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid02_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.02, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_02_4b
-	.type	_nds32_vector_02_4b, @function
-_nds32_vector_02_4b:
-1:
-	j	1b
-	.size	_nds32_vector_02_4b, .-_nds32_vector_02_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid02.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid02.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid02.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid02.S	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.02, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_02
 	.type	_nds32_vector_02, @function
 _nds32_vector_02:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid03_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid03_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid03_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid03_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.03, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_03_4b
-	.type	_nds32_vector_03_4b, @function
-_nds32_vector_03_4b:
-1:
-	j	1b
-	.size	_nds32_vector_03_4b, .-_nds32_vector_03_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid03.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid03.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid03.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid03.S	2016-08-08 20:37:53.718588032 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.03, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_03
 	.type	_nds32_vector_03, @function
 _nds32_vector_03:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid04_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid04_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid04_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid04_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.04, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_04_4b
-	.type	_nds32_vector_04_4b, @function
-_nds32_vector_04_4b:
-1:
-	j	1b
-	.size	_nds32_vector_04_4b, .-_nds32_vector_04_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid04.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid04.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid04.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid04.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.04, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_04
 	.type	_nds32_vector_04, @function
 _nds32_vector_04:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid05_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid05_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid05_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid05_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.05, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_05_4b
-	.type	_nds32_vector_05_4b, @function
-_nds32_vector_05_4b:
-1:
-	j	1b
-	.size	_nds32_vector_05_4b, .-_nds32_vector_05_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid05.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid05.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid05.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid05.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.05, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_05
 	.type	_nds32_vector_05, @function
 _nds32_vector_05:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid06_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid06_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid06_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid06_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.06, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_06_4b
-	.type	_nds32_vector_06_4b, @function
-_nds32_vector_06_4b:
-1:
-	j	1b
-	.size	_nds32_vector_06_4b, .-_nds32_vector_06_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid06.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid06.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid06.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid06.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.06, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_06
 	.type	_nds32_vector_06, @function
 _nds32_vector_06:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid07_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid07_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid07_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid07_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.07, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_07_4b
-	.type	_nds32_vector_07_4b, @function
-_nds32_vector_07_4b:
-1:
-	j	1b
-	.size	_nds32_vector_07_4b, .-_nds32_vector_07_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid07.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid07.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid07.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid07.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.07, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_07
 	.type	_nds32_vector_07, @function
 _nds32_vector_07:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid08_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid08_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid08_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid08_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.08, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_08_4b
-	.type	_nds32_vector_08_4b, @function
-_nds32_vector_08_4b:
-1:
-	j	1b
-	.size	_nds32_vector_08_4b, .-_nds32_vector_08_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid08.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid08.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid08.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid08.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.08, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_08
 	.type	_nds32_vector_08, @function
 _nds32_vector_08:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid09_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid09_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid09_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid09_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.09, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_09_4b
-	.type	_nds32_vector_09_4b, @function
-_nds32_vector_09_4b:
-1:
-	j	1b
-	.size	_nds32_vector_09_4b, .-_nds32_vector_09_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid09.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid09.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid09.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid09.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.09, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_09
 	.type	_nds32_vector_09, @function
 _nds32_vector_09:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid10_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid10_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid10_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid10_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.10, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_10_4b
-	.type	_nds32_vector_10_4b, @function
-_nds32_vector_10_4b:
-1:
-	j	1b
-	.size	_nds32_vector_10_4b, .-_nds32_vector_10_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid10.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid10.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid10.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid10.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.10, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_10
 	.type	_nds32_vector_10, @function
 _nds32_vector_10:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid11_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid11_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid11_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid11_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.11, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_11_4b
-	.type	_nds32_vector_11_4b, @function
-_nds32_vector_11_4b:
-1:
-	j	1b
-	.size	_nds32_vector_11_4b, .-_nds32_vector_11_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid11.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid11.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid11.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid11.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.11, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_11
 	.type	_nds32_vector_11, @function
 _nds32_vector_11:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid12_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid12_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid12_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid12_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.12, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_12_4b
-	.type	_nds32_vector_12_4b, @function
-_nds32_vector_12_4b:
-1:
-	j	1b
-	.size	_nds32_vector_12_4b, .-_nds32_vector_12_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid12.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid12.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid12.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid12.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.12, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_12
 	.type	_nds32_vector_12, @function
 _nds32_vector_12:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid13_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid13_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid13_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid13_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.13, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_13_4b
-	.type	_nds32_vector_13_4b, @function
-_nds32_vector_13_4b:
-1:
-	j	1b
-	.size	_nds32_vector_13_4b, .-_nds32_vector_13_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid13.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid13.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid13.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid13.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.13, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_13
 	.type	_nds32_vector_13, @function
 _nds32_vector_13:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid14_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid14_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid14_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid14_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.14, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_14_4b
-	.type	_nds32_vector_14_4b, @function
-_nds32_vector_14_4b:
-1:
-	j	1b
-	.size	_nds32_vector_14_4b, .-_nds32_vector_14_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid14.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid14.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid14.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid14.S	2016-08-08 20:37:53.722588187 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.14, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_14
 	.type	_nds32_vector_14, @function
 _nds32_vector_14:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid15_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid15_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid15_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid15_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.15, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_15_4b
-	.type	_nds32_vector_15_4b, @function
-_nds32_vector_15_4b:
-1:
-	j	1b
-	.size	_nds32_vector_15_4b, .-_nds32_vector_15_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid15.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid15.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid15.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid15.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.15, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_15
 	.type	_nds32_vector_15, @function
 _nds32_vector_15:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid16_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid16_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid16_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid16_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.16, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_16_4b
-	.type	_nds32_vector_16_4b, @function
-_nds32_vector_16_4b:
-1:
-	j	1b
-	.size	_nds32_vector_16_4b, .-_nds32_vector_16_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid16.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid16.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid16.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid16.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.16, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_16
 	.type	_nds32_vector_16, @function
 _nds32_vector_16:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid17_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid17_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid17_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid17_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.17, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_17_4b
-	.type	_nds32_vector_17_4b, @function
-_nds32_vector_17_4b:
-1:
-	j	1b
-	.size	_nds32_vector_17_4b, .-_nds32_vector_17_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid17.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid17.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid17.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid17.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.17, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_17
 	.type	_nds32_vector_17, @function
 _nds32_vector_17:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid18_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid18_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid18_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid18_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.18, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_18_4b
-	.type	_nds32_vector_18_4b, @function
-_nds32_vector_18_4b:
-1:
-	j	1b
-	.size	_nds32_vector_18_4b, .-_nds32_vector_18_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid18.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid18.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid18.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid18.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.18, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_18
 	.type	_nds32_vector_18, @function
 _nds32_vector_18:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid19_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid19_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid19_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid19_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.19, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_19_4b
-	.type	_nds32_vector_19_4b, @function
-_nds32_vector_19_4b:
-1:
-	j	1b
-	.size	_nds32_vector_19_4b, .-_nds32_vector_19_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid19.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid19.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid19.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid19.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.19, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_19
 	.type	_nds32_vector_19, @function
 _nds32_vector_19:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid20_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid20_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid20_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid20_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.20, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_20_4b
-	.type	_nds32_vector_20_4b, @function
-_nds32_vector_20_4b:
-1:
-	j	1b
-	.size	_nds32_vector_20_4b, .-_nds32_vector_20_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid20.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid20.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid20.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid20.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.20, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_20
 	.type	_nds32_vector_20, @function
 _nds32_vector_20:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid21_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid21_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid21_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid21_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.21, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_21_4b
-	.type	_nds32_vector_21_4b, @function
-_nds32_vector_21_4b:
-1:
-	j	1b
-	.size	_nds32_vector_21_4b, .-_nds32_vector_21_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid21.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid21.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid21.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid21.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.21, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_21
 	.type	_nds32_vector_21, @function
 _nds32_vector_21:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid22_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid22_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid22_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid22_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.22, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_22_4b
-	.type	_nds32_vector_22_4b, @function
-_nds32_vector_22_4b:
-1:
-	j	1b
-	.size	_nds32_vector_22_4b, .-_nds32_vector_22_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid22.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid22.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid22.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid22.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.22, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_22
 	.type	_nds32_vector_22, @function
 _nds32_vector_22:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid23_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid23_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid23_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid23_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.23, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_23_4b
-	.type	_nds32_vector_23_4b, @function
-_nds32_vector_23_4b:
-1:
-	j	1b
-	.size	_nds32_vector_23_4b, .-_nds32_vector_23_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid23.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid23.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid23.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid23.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.23, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_23
 	.type	_nds32_vector_23, @function
 _nds32_vector_23:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid24_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid24_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid24_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid24_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.24, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_24_4b
-	.type	_nds32_vector_24_4b, @function
-_nds32_vector_24_4b:
-1:
-	j	1b
-	.size	_nds32_vector_24_4b, .-_nds32_vector_24_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid24.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid24.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid24.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid24.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.24, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_24
 	.type	_nds32_vector_24, @function
 _nds32_vector_24:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid25_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid25_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid25_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid25_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.25, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_25_4b
-	.type	_nds32_vector_25_4b, @function
-_nds32_vector_25_4b:
-1:
-	j	1b
-	.size	_nds32_vector_25_4b, .-_nds32_vector_25_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid25.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid25.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid25.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid25.S	2016-08-08 20:37:53.726588342 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.25, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_25
 	.type	_nds32_vector_25, @function
 _nds32_vector_25:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid26_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid26_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid26_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid26_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.26, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_26_4b
-	.type	_nds32_vector_26_4b, @function
-_nds32_vector_26_4b:
-1:
-	j	1b
-	.size	_nds32_vector_26_4b, .-_nds32_vector_26_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid26.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid26.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid26.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid26.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.26, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_26
 	.type	_nds32_vector_26, @function
 _nds32_vector_26:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid27_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid27_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid27_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid27_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.27, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_27_4b
-	.type	_nds32_vector_27_4b, @function
-_nds32_vector_27_4b:
-1:
-	j	1b
-	.size	_nds32_vector_27_4b, .-_nds32_vector_27_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid27.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid27.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid27.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid27.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.27, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_27
 	.type	_nds32_vector_27, @function
 _nds32_vector_27:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid28_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid28_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid28_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid28_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.28, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_28_4b
-	.type	_nds32_vector_28_4b, @function
-_nds32_vector_28_4b:
-1:
-	j	1b
-	.size	_nds32_vector_28_4b, .-_nds32_vector_28_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid28.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid28.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid28.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid28.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.28, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_28
 	.type	_nds32_vector_28, @function
 _nds32_vector_28:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid29_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid29_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid29_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid29_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.29, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_29_4b
-	.type	_nds32_vector_29_4b, @function
-_nds32_vector_29_4b:
-1:
-	j	1b
-	.size	_nds32_vector_29_4b, .-_nds32_vector_29_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid29.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid29.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid29.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid29.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.29, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_29
 	.type	_nds32_vector_29, @function
 _nds32_vector_29:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid30_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid30_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid30_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid30_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.30, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_30_4b
-	.type	_nds32_vector_30_4b, @function
-_nds32_vector_30_4b:
-1:
-	j	1b
-	.size	_nds32_vector_30_4b, .-_nds32_vector_30_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid30.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid30.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid30.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid30.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.30, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_30
 	.type	_nds32_vector_30, @function
 _nds32_vector_30:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid31_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid31_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid31_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid31_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.31, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_31_4b
-	.type	_nds32_vector_31_4b, @function
-_nds32_vector_31_4b:
-1:
-	j	1b
-	.size	_nds32_vector_31_4b, .-_nds32_vector_31_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid31.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid31.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid31.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid31.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.31, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_31
 	.type	_nds32_vector_31, @function
 _nds32_vector_31:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid32_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid32_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid32_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid32_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.32, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_32_4b
-	.type	_nds32_vector_32_4b, @function
-_nds32_vector_32_4b:
-1:
-	j	1b
-	.size	_nds32_vector_32_4b, .-_nds32_vector_32_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid32.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid32.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid32.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid32.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.32, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_32
 	.type	_nds32_vector_32, @function
 _nds32_vector_32:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid33_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid33_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid33_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid33_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.33, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_33_4b
-	.type	_nds32_vector_33_4b, @function
-_nds32_vector_33_4b:
-1:
-	j	1b
-	.size	_nds32_vector_33_4b, .-_nds32_vector_33_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid33.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid33.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid33.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid33.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.33, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_33
 	.type	_nds32_vector_33, @function
 _nds32_vector_33:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid34_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid34_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid34_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid34_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.34, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_34_4b
-	.type	_nds32_vector_34_4b, @function
-_nds32_vector_34_4b:
-1:
-	j	1b
-	.size	_nds32_vector_34_4b, .-_nds32_vector_34_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid34.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid34.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid34.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid34.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.34, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_34
 	.type	_nds32_vector_34, @function
 _nds32_vector_34:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid35_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid35_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid35_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid35_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.35, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_35_4b
-	.type	_nds32_vector_35_4b, @function
-_nds32_vector_35_4b:
-1:
-	j	1b
-	.size	_nds32_vector_35_4b, .-_nds32_vector_35_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid35.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid35.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid35.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid35.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.35, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_35
 	.type	_nds32_vector_35, @function
 _nds32_vector_35:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid36_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid36_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid36_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid36_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.36, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_36_4b
-	.type	_nds32_vector_36_4b, @function
-_nds32_vector_36_4b:
-1:
-	j	1b
-	.size	_nds32_vector_36_4b, .-_nds32_vector_36_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid36.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid36.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid36.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid36.S	2016-08-08 20:37:53.730588496 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.36, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_36
 	.type	_nds32_vector_36, @function
 _nds32_vector_36:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid37_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid37_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid37_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid37_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.37, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_37_4b
-	.type	_nds32_vector_37_4b, @function
-_nds32_vector_37_4b:
-1:
-	j	1b
-	.size	_nds32_vector_37_4b, .-_nds32_vector_37_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid37.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid37.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid37.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid37.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.37, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_37
 	.type	_nds32_vector_37, @function
 _nds32_vector_37:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid38_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid38_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid38_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid38_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.38, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_38_4b
-	.type	_nds32_vector_38_4b, @function
-_nds32_vector_38_4b:
-1:
-	j	1b
-	.size	_nds32_vector_38_4b, .-_nds32_vector_38_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid38.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid38.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid38.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid38.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.38, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_38
 	.type	_nds32_vector_38, @function
 _nds32_vector_38:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid39_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid39_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid39_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid39_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.39, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_39_4b
-	.type	_nds32_vector_39_4b, @function
-_nds32_vector_39_4b:
-1:
-	j	1b
-	.size	_nds32_vector_39_4b, .-_nds32_vector_39_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid39.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid39.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid39.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid39.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.39, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_39
 	.type	_nds32_vector_39, @function
 _nds32_vector_39:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid40_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid40_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid40_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid40_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.40, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_40_4b
-	.type	_nds32_vector_40_4b, @function
-_nds32_vector_40_4b:
-1:
-	j	1b
-	.size	_nds32_vector_40_4b, .-_nds32_vector_40_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid40.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid40.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid40.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid40.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.40, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_40
 	.type	_nds32_vector_40, @function
 _nds32_vector_40:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid41_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid41_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid41_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid41_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.41, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_41_4b
-	.type	_nds32_vector_41_4b, @function
-_nds32_vector_41_4b:
-1:
-	j	1b
-	.size	_nds32_vector_41_4b, .-_nds32_vector_41_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid41.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid41.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid41.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid41.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.41, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_41
 	.type	_nds32_vector_41, @function
 _nds32_vector_41:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid42_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid42_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid42_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid42_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.42, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_42_4b
-	.type	_nds32_vector_42_4b, @function
-_nds32_vector_42_4b:
-1:
-	j	1b
-	.size	_nds32_vector_42_4b, .-_nds32_vector_42_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid42.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid42.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid42.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid42.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.42, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_42
 	.type	_nds32_vector_42, @function
 _nds32_vector_42:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid43_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid43_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid43_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid43_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.43, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_43_4b
-	.type	_nds32_vector_43_4b, @function
-_nds32_vector_43_4b:
-1:
-	j	1b
-	.size	_nds32_vector_43_4b, .-_nds32_vector_43_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid43.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid43.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid43.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid43.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.43, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_43
 	.type	_nds32_vector_43, @function
 _nds32_vector_43:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid44_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid44_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid44_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid44_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.44, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_44_4b
-	.type	_nds32_vector_44_4b, @function
-_nds32_vector_44_4b:
-1:
-	j	1b
-	.size	_nds32_vector_44_4b, .-_nds32_vector_44_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid44.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid44.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid44.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid44.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.44, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_44
 	.type	_nds32_vector_44, @function
 _nds32_vector_44:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid45_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid45_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid45_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid45_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.45, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_45_4b
-	.type	_nds32_vector_45_4b, @function
-_nds32_vector_45_4b:
-1:
-	j	1b
-	.size	_nds32_vector_45_4b, .-_nds32_vector_45_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid45.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid45.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid45.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid45.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.45, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_45
 	.type	_nds32_vector_45, @function
 _nds32_vector_45:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid46_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid46_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid46_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid46_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.46, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_46_4b
-	.type	_nds32_vector_46_4b, @function
-_nds32_vector_46_4b:
-1:
-	j	1b
-	.size	_nds32_vector_46_4b, .-_nds32_vector_46_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid46.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid46.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid46.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid46.S	2016-08-08 20:37:53.734588650 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.46, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_46
 	.type	_nds32_vector_46, @function
 _nds32_vector_46:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid47_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid47_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid47_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid47_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.47, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_47_4b
-	.type	_nds32_vector_47_4b, @function
-_nds32_vector_47_4b:
-1:
-	j	1b
-	.size	_nds32_vector_47_4b, .-_nds32_vector_47_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid47.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid47.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid47.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid47.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.47, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_47
 	.type	_nds32_vector_47, @function
 _nds32_vector_47:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid48_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid48_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid48_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid48_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.48, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_48_4b
-	.type	_nds32_vector_48_4b, @function
-_nds32_vector_48_4b:
-1:
-	j	1b
-	.size	_nds32_vector_48_4b, .-_nds32_vector_48_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid48.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid48.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid48.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid48.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.48, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_48
 	.type	_nds32_vector_48, @function
 _nds32_vector_48:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid49_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid49_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid49_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid49_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.49, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_49_4b
-	.type	_nds32_vector_49_4b, @function
-_nds32_vector_49_4b:
-1:
-	j	1b
-	.size	_nds32_vector_49_4b, .-_nds32_vector_49_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid49.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid49.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid49.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid49.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.49, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_49
 	.type	_nds32_vector_49, @function
 _nds32_vector_49:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid50_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid50_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid50_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid50_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.50, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_50_4b
-	.type	_nds32_vector_50_4b, @function
-_nds32_vector_50_4b:
-1:
-	j	1b
-	.size	_nds32_vector_50_4b, .-_nds32_vector_50_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid50.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid50.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid50.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid50.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.50, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_50
 	.type	_nds32_vector_50, @function
 _nds32_vector_50:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid51_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid51_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid51_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid51_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.51, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_51_4b
-	.type	_nds32_vector_51_4b, @function
-_nds32_vector_51_4b:
-1:
-	j	1b
-	.size	_nds32_vector_51_4b, .-_nds32_vector_51_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid51.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid51.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid51.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid51.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.51, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_51
 	.type	_nds32_vector_51, @function
 _nds32_vector_51:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid52_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid52_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid52_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid52_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.52, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_52_4b
-	.type	_nds32_vector_52_4b, @function
-_nds32_vector_52_4b:
-1:
-	j	1b
-	.size	_nds32_vector_52_4b, .-_nds32_vector_52_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid52.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid52.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid52.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid52.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.52, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_52
 	.type	_nds32_vector_52, @function
 _nds32_vector_52:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid53_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid53_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid53_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid53_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.53, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_53_4b
-	.type	_nds32_vector_53_4b, @function
-_nds32_vector_53_4b:
-1:
-	j	1b
-	.size	_nds32_vector_53_4b, .-_nds32_vector_53_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid53.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid53.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid53.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid53.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.53, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_53
 	.type	_nds32_vector_53, @function
 _nds32_vector_53:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid54_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid54_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid54_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid54_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.54, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_54_4b
-	.type	_nds32_vector_54_4b, @function
-_nds32_vector_54_4b:
-1:
-	j	1b
-	.size	_nds32_vector_54_4b, .-_nds32_vector_54_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid54.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid54.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid54.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid54.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.54, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_54
 	.type	_nds32_vector_54, @function
 _nds32_vector_54:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid55_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid55_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid55_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid55_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.55, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_55_4b
-	.type	_nds32_vector_55_4b, @function
-_nds32_vector_55_4b:
-1:
-	j	1b
-	.size	_nds32_vector_55_4b, .-_nds32_vector_55_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid55.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid55.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid55.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid55.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.55, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_55
 	.type	_nds32_vector_55, @function
 _nds32_vector_55:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid56_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid56_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid56_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid56_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.56, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_56_4b
-	.type	_nds32_vector_56_4b, @function
-_nds32_vector_56_4b:
-1:
-	j	1b
-	.size	_nds32_vector_56_4b, .-_nds32_vector_56_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid56.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid56.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid56.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid56.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.56, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_56
 	.type	_nds32_vector_56, @function
 _nds32_vector_56:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid57_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid57_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid57_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid57_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.57, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_57_4b
-	.type	_nds32_vector_57_4b, @function
-_nds32_vector_57_4b:
-1:
-	j	1b
-	.size	_nds32_vector_57_4b, .-_nds32_vector_57_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid57.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid57.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid57.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid57.S	2016-08-08 20:37:53.738588805 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.57, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_57
 	.type	_nds32_vector_57, @function
 _nds32_vector_57:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid58_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid58_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid58_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid58_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.58, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_58_4b
-	.type	_nds32_vector_58_4b, @function
-_nds32_vector_58_4b:
-1:
-	j	1b
-	.size	_nds32_vector_58_4b, .-_nds32_vector_58_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid58.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid58.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid58.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid58.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.58, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_58
 	.type	_nds32_vector_58, @function
 _nds32_vector_58:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid59_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid59_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid59_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid59_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.59, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_59_4b
-	.type	_nds32_vector_59_4b, @function
-_nds32_vector_59_4b:
-1:
-	j	1b
-	.size	_nds32_vector_59_4b, .-_nds32_vector_59_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid59.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid59.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid59.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid59.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.59, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_59
 	.type	_nds32_vector_59, @function
 _nds32_vector_59:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid60_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid60_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid60_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid60_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.60, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_60_4b
-	.type	_nds32_vector_60_4b, @function
-_nds32_vector_60_4b:
-1:
-	j	1b
-	.size	_nds32_vector_60_4b, .-_nds32_vector_60_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid60.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid60.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid60.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid60.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.60, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_60
 	.type	_nds32_vector_60, @function
 _nds32_vector_60:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid61_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid61_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid61_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid61_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.61, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_61_4b
-	.type	_nds32_vector_61_4b, @function
-_nds32_vector_61_4b:
-1:
-	j	1b
-	.size	_nds32_vector_61_4b, .-_nds32_vector_61_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid61.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid61.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid61.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid61.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.61, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_61
 	.type	_nds32_vector_61, @function
 _nds32_vector_61:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid62_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid62_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid62_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid62_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.62, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_62_4b
-	.type	_nds32_vector_62_4b, @function
-_nds32_vector_62_4b:
-1:
-	j	1b
-	.size	_nds32_vector_62_4b, .-_nds32_vector_62_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid62.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid62.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid62.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid62.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.62, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_62
 	.type	_nds32_vector_62, @function
 _nds32_vector_62:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid63_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid63_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid63_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid63_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.63, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_63_4b
-	.type	_nds32_vector_63_4b, @function
-_nds32_vector_63_4b:
-1:
-	j	1b
-	.size	_nds32_vector_63_4b, .-_nds32_vector_63_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid63.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid63.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid63.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid63.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.63, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_63
 	.type	_nds32_vector_63, @function
 _nds32_vector_63:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid64_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid64_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid64_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid64_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.64, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_64_4b
-	.type	_nds32_vector_64_4b, @function
-_nds32_vector_64_4b:
-1:
-	j	1b
-	.size	_nds32_vector_64_4b, .-_nds32_vector_64_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid64.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid64.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid64.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid64.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.64, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_64
 	.type	_nds32_vector_64, @function
 _nds32_vector_64:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid65_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid65_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid65_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid65_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.65, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_65_4b
-	.type	_nds32_vector_65_4b, @function
-_nds32_vector_65_4b:
-1:
-	j	1b
-	.size	_nds32_vector_65_4b, .-_nds32_vector_65_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid65.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid65.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid65.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid65.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.65, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_65
 	.type	_nds32_vector_65, @function
 _nds32_vector_65:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid66_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid66_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid66_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid66_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.66, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_66_4b
-	.type	_nds32_vector_66_4b, @function
-_nds32_vector_66_4b:
-1:
-	j	1b
-	.size	_nds32_vector_66_4b, .-_nds32_vector_66_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid66.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid66.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid66.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid66.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.66, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_66
 	.type	_nds32_vector_66, @function
 _nds32_vector_66:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid67_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid67_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid67_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid67_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.67, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_67_4b
-	.type	_nds32_vector_67_4b, @function
-_nds32_vector_67_4b:
-1:
-	j	1b
-	.size	_nds32_vector_67_4b, .-_nds32_vector_67_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid67.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid67.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid67.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid67.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.67, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_67
 	.type	_nds32_vector_67, @function
 _nds32_vector_67:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid68_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid68_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid68_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid68_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.68, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_68_4b
-	.type	_nds32_vector_68_4b, @function
-_nds32_vector_68_4b:
-1:
-	j	1b
-	.size	_nds32_vector_68_4b, .-_nds32_vector_68_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid68.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid68.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid68.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid68.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.68, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_68
 	.type	_nds32_vector_68, @function
 _nds32_vector_68:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid69_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid69_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid69_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid69_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.69, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_69_4b
-	.type	_nds32_vector_69_4b, @function
-_nds32_vector_69_4b:
-1:
-	j	1b
-	.size	_nds32_vector_69_4b, .-_nds32_vector_69_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid69.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid69.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid69.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid69.S	2016-08-08 20:37:53.742588960 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.69, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_69
 	.type	_nds32_vector_69, @function
 _nds32_vector_69:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid70_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid70_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid70_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid70_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.70, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_70_4b
-	.type	_nds32_vector_70_4b, @function
-_nds32_vector_70_4b:
-1:
-	j	1b
-	.size	_nds32_vector_70_4b, .-_nds32_vector_70_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid70.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid70.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid70.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid70.S	2016-08-08 20:37:53.746589115 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.70, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_70
 	.type	_nds32_vector_70, @function
 _nds32_vector_70:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid71_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid71_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid71_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid71_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.71, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_71_4b
-	.type	_nds32_vector_71_4b, @function
-_nds32_vector_71_4b:
-1:
-	j	1b
-	.size	_nds32_vector_71_4b, .-_nds32_vector_71_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid71.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid71.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid71.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid71.S	2016-08-08 20:37:53.746589115 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.71, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_71
 	.type	_nds32_vector_71, @function
 _nds32_vector_71:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid72_4b.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid72_4b.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid72_4b.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid72_4b.S	1970-01-01 01:00:00.000000000 +0100
@@ -1,34 +0,0 @@
-/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-	.section	.nds32_vector.72, "ax"
-	.vec_size	4
-	.align	2
-	.weak	_nds32_vector_72_4b
-	.type	_nds32_vector_72_4b, @function
-_nds32_vector_72_4b:
-1:
-	j	1b
-	.size	_nds32_vector_72_4b, .-_nds32_vector_72_4b
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid72.S gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid72.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/vec_vid72.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/vec_vid72.S	2016-08-08 20:37:53.746589115 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.72, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_72
 	.type	_nds32_vector_72, @function
 _nds32_vector_72:
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/isr-library/wrh.S gcc-4.9.4/libgcc/config/nds32/isr-library/wrh.S
--- gcc-4.9.4.orig/libgcc/config/nds32/isr-library/wrh.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/isr-library/wrh.S	2016-08-08 20:37:53.746589115 +0200
@@ -1,5 +1,5 @@
 /* c-isr library stuff of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib1asmsrc-mculib.S gcc-4.9.4/libgcc/config/nds32/lib1asmsrc-mculib.S
--- gcc-4.9.4.orig/libgcc/config/nds32/lib1asmsrc-mculib.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib1asmsrc-mculib.S	2016-08-08 20:37:53.750589269 +0200
@@ -1,5 +1,5 @@
 /* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -33,278 +33,281 @@
 
 #ifdef L_addsub_sf
 
+#define VALUA	$r2	// A<<1
+#define VALUB	$r3	// B<<1
+#define EXPOA	$r4	// exponent(A)
+#ifdef __NDS32_REDUCE_REGS__
+#define EXPOB	$r8	// exponent(B)
+#define MANTA	$r6	// mantissa(A) related
+#define MANTB	$r9	// mantissa(B) related
+#define SIGN	$r7	// 0x80000000
+#else
+#define EXPOB	$r18	// exponent(B)
+#define MANTA	$r16	// mantissa(A) related
+#define MANTB	$r19	// mantissa(B) related
+#define SIGN	$r17	// 0x80000000
+#endif
+#define W1	$r5
+
 	.text
 	.align	2
 	.global	__subsf3
 	.type	__subsf3, @function
 __subsf3:
-	push    $lp
-	pushm   $r6, $r9
-
-	move    $r2, #0x80000000
-	xor     $r1, $r1, $r2
-
-	j       .Lsfpadd
+#ifdef __NDS32_EXT_PERF__
+	btgl	$r1, $r1, 31
+#else
+	move	$r2, #0x80000000
+	xor	$r1, $r1, $r2		! A-B is now A+(-B)
+#endif
 
 	.global	__addsf3
 	.type	__addsf3, @function
 __addsf3:
-	push    $lp
-	pushm   $r6, $r9
-.Lsfpadd:
-	srli    $r5, $r0, #23
-	andi    $r5, $r5, #0xff
-	srli    $r7, $r1, #23
-	andi    $r7, $r7, #0xff
-	move    $r3, #0x80000000
-	slli    $r4, $r0, #8
-	or      $r4, $r4, $r3
-	slli    $r6, $r1, #8
-	or      $r6, $r6, $r3
-
-	addi    $r9, $r5, #-1
-	slti    $r15, $r9, #0xfe
-	beqzs8  .LEspecA
+	slli	VALUA, $r0, 1		! A<<1
+#ifdef __NDS32_REDUCE_REGS__
+	smw.adm	$r6, [$sp], $r9, 0
+#endif
+
+	slli	VALUB, $r1, 1		! B<<1
+	move	SIGN, #0x80000000
+	slt	$r15, VALUA, VALUB	! absolute value(A)<absolute value(B)?
+	beqz	$r15, .LEcont
+	move	W1, $r0			! yes, swap A and B
+	move	$r0, $r1
+	move	$r1, W1
+	slli	VALUA, $r0, 1		! A<<1
+	slli	VALUB, $r1, 1		! B<<1
+
+	! ---------------------------------------------------------------------
+	! absolute value(A) >= absolute value(B)
+	! ---------------------------------------------------------------------
+.LEcont:
+	xor	$r1, $r1, $r0
+	and	$r1, $r1, SIGN		! sign(A xor B)
+	srli	EXPOA, VALUA, 24	! exponent(A)
+	srli	EXPOB, VALUB, 24	! exponent(B)
+	slli	MANTA, VALUA, 7		! mantissa(A)<<8
+	slli	MANTB, VALUB, 7		! mantissa(B)<<8
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOA, 0xff, .LEinfnan	! A is inf or NaN, goto .LEinfnan
+#else
+	move	W1, #0xff
+	beq	W1, EXPOA, .LEinfnan	! A is inf or NaN, goto .LEinfnan
+#endif
+	! A is finite and thus B can only be finite
+	beqz	VALUA, .LEzeroP		! A is zero, simply return zero
+	beqz	VALUB, .LEretA		! B is zero, simply return A
+	sub	W1, EXPOA, EXPOB	! exponent(A)-exponent(B)
+	slti	$r15, EXPOA, #0x2
+	bnez	$r15, .LElab2		! exponent(A) is 0 or 1, goto .LElab2
+	sltsi	$r15, W1, #0x20
+	beqz	$r15, .LEretA		! B is insignificant, simply return A
+	or	MANTA, MANTA, SIGN	! decimal-part(A)
+	beqz	EXPOB, .LElab1
+	or	MANTB, MANTB, SIGN	! decimal-part(B)
 
 .LElab1:
-	addi    $r9, $r7, #-1
-	slti    $r15, $r9, #0xfe
-	beqzs8  .LEspecB
+	addi	$r15, W1, -1
+	cmovz	W1, $r15, EXPOB
+	move	$r15, MANTB
+	srl	MANTB, MANTB, W1
+	sll	W1, MANTB, W1
+	beq	W1, $r15, .LElab2
+	ori	MANTB, MANTB, #2	! B is quite small comapre to A
 
 .LElab2:
-	sub     $r8, $r5, $r7
-	sltsi   $r15, $r8, #0
-	bnezs8  .Li1
-	sltsi   $r15, $r8, #0x20
-	bnezs8  .Li2
-	move    $r6, #2
-	j       .Le1
-.Li2:
-	move    $r2, $r6
-	srl     $r6, $r6, $r8
-	sll     $r9, $r6, $r8
-	beq     $r9, $r2, .Le1
-	ori     $r6, $r6, #2
-	j       .Le1
-.Li1:
-	move    $r5, $r7
-	subri   $r8, $r8, #0
-	sltsi   $r15, $r8, #0x20
-	bnezs8  .Li4
-	move    $r4, #2
-	j       .Le1
-.Li4:
-	move    $r2, $r4
-	srl     $r4, $r4, $r8
-	sll     $r9, $r4, $r8
-	beq     $r9, $r2, .Le1
-	ori     $r4, $r4, #2
-
-.Le1:
-	and     $r8, $r0, $r3
-	xor     $r9, $r8, $r1
-	sltsi   $r15, $r9, #0
-	bnezs8  .LEsub1
-
-	#ADD($r4, $r6)
-	add     $r4, $r4, $r6
-	slt     $r15, $r4, $r6
-	beqzs8  .LEres
-	andi    $r9, $r4, #1
-	beqz    $r9, .Li7
-	ori     $r4, $r4, #2
-.Li7:
-	srli    $r4, $r4, #1
-	addi    $r5, $r5, #1
-	subri   $r15, $r5, #0xff
-	bnezs8  .LEres
-	move    $r4, #0
-	j       .LEres
+	bnez	$r1, .LEsub		! different sign, do subtraction
 
-.LEsub1:
-	#SUB($r4, $r6)
-	move    $r15, $r4
-	sub     $r4, $r4, $r6
-	slt     $r15, $r15, $r4
-	beqzs8  .Li9
-	subri   $r4, $r4, #0
-	xor     $r8, $r8, $r3
-	j       .Le9
-.Li9:
-	beqz    $r4, .LEzer
-.Le9:
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, $r4
-#else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
-#endif
-	sub     $r5, $r5, $r2
-	sll     $r4, $r4, $r2
+	! ---------------------------------------------------------------------
+	! same sign, do addition
+	! ---------------------------------------------------------------------
+	add	MANTA, MANTA, MANTB
+	slt	$r15, MANTA, MANTB
+	beqz	$r15, .LEaddnoover	! no overflow, goto .LEaddnoover
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOA, #0xfe, .LEinf
+#else
+	subri	$r15, EXPOA, #0xfe
+	beqz	$r15, .LEinf
+#endif
+	andi	$r15, MANTA, #1
+	ori	W1, MANTA, #2
+	cmovn	MANTA, W1, $r15
+	srli	MANTA, MANTA, #1
+	addi	EXPOA, EXPOA, #1
+	b	.LEround
+
+.LEaddnoover:
+	bnez	EXPOA, .LEround		! special handling when exponent(A) is zero
+
+.LEdenorm:
+	srli	MANTA, MANTA, #8	! 0x008nnnnn-0x00fnnnnn
+	b	.LEpack
+
+	! ---------------------------------------------------------------------
+	! different sign, do subtraction
+	! ---------------------------------------------------------------------
+.LEsub:
+	beq	VALUA, VALUB, .LEzero
+	slt	$r15, MANTA, MANTB
+	beqz	$r15, .LEsub2
+	srli	MANTB, MANTB, 1
+	addi	EXPOA, EXPOA, -1
+
+.LEsub2:
+	sub	MANTA, MANTA, MANTB
+	slti	$r15, EXPOA, 2
+	bnez	$r15, .LEdenorm		! only when exponent(A,B) is (0,0) or (1,0/1)
+#ifdef __NDS32_EXT_PERF__
+	clz	W1, MANTA
+	slt	$r15, W1, EXPOA
+	subri	$r15, $r15, #1
+	min	W1, W1, EXPOA
+	sub	EXPOA, EXPOA, W1
+	sub	W1, W1, $r15
+	sll	MANTA, MANTA, W1
+#else
+	b	.LEloopC2
+
+.LEloopC:
+	addi	EXPOA, EXPOA, #-1
+	beqz	EXPOA, .LEround
+	add	MANTA, MANTA, MANTA
+
+.LEloopC2:
+	slt	$r15, MANTA, SIGN
+	bnez	$r15, .LEloopC
+#endif
+
+	! ---------------------------------------------------------------------
+	! do rounding
+	! ---------------------------------------------------------------------
+.LEround:
+	addi	MANTA, MANTA, #128
+	slti	$r15, MANTA, #128
+	add	EXPOA, EXPOA, $r15
+	srli	W1, MANTA, 8
+	andi	W1, W1, 1
+	sub	MANTA, MANTA, W1
+
+	! ---------------------------------------------------------------------
+	! pack result
+	! ---------------------------------------------------------------------
+	slli	MANTA, MANTA, #1	! shift out implied 1
+	srli	MANTA, MANTA, #9
+	slli	$r1, EXPOA, #23
+	or	MANTA, MANTA, $r1
+.LEpack:
+	and	$r0, $r0, SIGN
+	or	$r0, $r0, MANTA
 
-.LEres:
-	blez    $r5, .LEund
+.LEretA:
+.LEret:
+#ifdef __NDS32_REDUCE_REGS__
+	lmw.bim	$r6, [$sp], $r9, 0
+#endif
+	ret5	$lp
 
-.LElab12:
-	#ADD($r4, $0x80)
-	move    $r15, #0x80
-	add     $r4, $r4, $r15
-	slt     $r15, $r4, $r15
+	! 0.0f and -0.0f handling: both A and B are zeroes
+.LEzeroP:
+	beqz	$r1, .LEretA		! A and B same sign: return A
 
-	#ADDC($r5, $0x0)
-	add     $r5, $r5, $r15
-	srli    $r9, $r4, #8
-	andi    $r9, $r9, #1
-	sub     $r4, $r4, $r9
-	slli    $r4, $r4, #1
-	srli    $r4, $r4, #9
-	slli    $r9, $r5, #23
-	or      $r4, $r4, $r9
-	or      $r0, $r4, $r8
-
-.LE999:
-	popm    $r6, $r9
-	pop     $lp
-	ret5    $lp
+.LEzero:
+	move	$r0, #0			! return +0.0f
+	b	.LEret
 
-.LEund:
-	subri   $r2, $r5, #1
-	slti    $r15, $r2, #0x20
-	beqzs8  .LEzer
-	move    $r9, #0x80000000
-	or      $r4, $r4, $r9
-	subri   $r9, $r2, #0x20
-	sll     $r5, $r4, $r9
-	srl     $r4, $r4, $r2
-	beqz    $r5, .Li10
-	ori     $r4, $r4, #1
-.Li10:
-	move    $r5, #0
-	addi    $r9, $r4, #0x80
-	sltsi   $r15, $r9, #0
-	beqzs8  .LElab12
-	move    $r5, #1
-	j       .LElab12
-
-.LEspecA:
-	bnez    $r5, .Li12
-	add     $r4, $r4, $r4
-	beqz    $r4, .Li13
-#ifdef __NDS32_PERF_EXT__
-	clz	$r8, $r4
-#else
-	pushm	$r0, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r8, $r0
-	popm	$r0, $r5
-#endif
-	sub     $r5, $r5, $r8
-	sll     $r4, $r4, $r8
-	j       .LElab1
-.Li13:
-	subri   $r15, $r7, #0xff
-	beqzs8  .LEspecB
-	move    $r9, #0x80000000
-	bne     $r1, $r9, .LEretB
-.Li12:
-	add     $r9, $r4, $r4
-	bnez    $r9, .LEnan
-	subri   $r15, $r7, #0xff
-	bnezs8  .LEretA
-	xor     $r9, $r0, $r1
-	sltsi   $r15, $r9, #0
-	bnezs8  .LEnan
-	j       .LEretB
-
-.LEspecB:
-	bnez    $r7, .Li15
-	add     $r6, $r6, $r6
-	beqz    $r6, .LEretA
-#ifdef __NDS32_PERF_EXT__
-	clz	$r8, $r6
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0xff: A is inf or NaN
+	! ---------------------------------------------------------------------
+.LEinfnan:
+	bne	MANTA, SIGN, .LEnan	! A is NaN, goto .LEnan
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	bnec	EXPOB, #0xff, .LEretA	! B is finite, return A
 #else
-	pushm	$r0, $r5
-	move	$r0, $r6
-	bal	__clzsi2
-	move	$r8, $r0
-	popm	$r0, $r5
+	bne	W1, EXPOB, .LEretA	! B is finite, return A
 #endif
-	sub     $r7, $r7, $r8
-	sll     $r6, $r6, $r8
-	j       .LElab2
-.Li15:
-	add     $r9, $r6, $r6
-	bnez    $r9, .LEnan
-
-.LEretB:
-	move    $r0, $r1
-	j       .LE999
-
-.LEretA:
-	j       .LE999
 
-.LEzer:
-	move    $r0, #0
-	j       .LE999
+	! both A and B are inf
+	beqz	$r1, .LEretA		! same sign, return inf
 
 .LEnan:
-	move    $r0, #0xffc00000
-	j       .LE999
+	move	$r0, #0xffc00000	! return NaN
+	b	.LEret
+
+.LEinf:
+	move	MANTA, 0x7f800000	! return inf
+	b	.LEpack
 	.size	__subsf3, .-__subsf3
 	.size	__addsf3, .-__addsf3
 #endif /* L_addsub_sf */
 
 
 
-#ifdef L_sf_to_si
+#ifdef L_fixsfsi
+
+#define VALUA	$r1	// A<<1
+#define EXPOA	VALUA	// exponent(A)
+#define MANTA	$r2	// mantissa(A) related
+#define W0	$r4
+#define W1	$r5
 
 	.text
 	.align	2
 	.global	__fixsfsi
 	.type	__fixsfsi, @function
 __fixsfsi:
-	push    $lp
-
-	slli    $r1, $r0, #8
-	move    $r3, #0x80000000
-	or      $r1, $r1, $r3
-	srli    $r3, $r0, #23
-	andi    $r3, $r3, #0xff
-	subri   $r2, $r3, #0x9e
-	blez    $r2, .LJspec
-	sltsi   $r15, $r2, #0x20
-	bnezs8  .Li42
-	move    $r0, #0
-	j       .LJ999
-.Li42:
-	srl     $r1, $r1, $r2
-	sltsi   $r15, $r0, #0
-	beqzs8  .Li43
-	subri   $r1, $r1, #0
-.Li43:
-	move    $r0, $r1
-
-.LJ999:
-	pop     $lp
-	ret5    $lp
-
-.LJspec:
-	move    $r3, #0x7f800000
-	slt     $r15, $r3, $r0
-	beqzs8  .Li44
-	move    $r0, #0x80000000
-	j       .LJ999
-.Li44:
-	move    $r0, #0x7fffffff
-	j       .LJ999
+#if defined(__NDS32_EXT_FPU_SP)
+        fs2si.z   $fs0, $fs0
+        fmfsr   $r0, $fs0
+        ret5    $lp
+#else
+	slli	VALUA, $r0, #1
+	slli	MANTA, VALUA, #7
+	srli	EXPOA, VALUA, #24
+	subri	EXPOA, EXPOA, #0x9e
+#if defined(__OPTIMIZE_SIZE__)||!defined(__NDS32_EXT_PERF__)
+	move	W1, #0x80000000
+#endif
+	blez	EXPOA, .LJover		! number is too big
+	sltsi	$r15, EXPOA, #0x20
+	beqz	$r15, .LJzero		! number is too small
+
+#if defined(__NDS32_EXT_PERF__)&&!defined(__OPTIMIZE_SIZE__)
+	bset	MANTA, MANTA, 31
+#else
+	or	MANTA, MANTA, W1
+#endif
+	srl	MANTA, MANTA, EXPOA
+	sltsi	$r15, $r0, #0
+	subri	$r0, MANTA, #0
+	cmovz	$r0, MANTA, $r15
+	ret5	$lp
+
+.LJzero:
+	move	$r0, #0
+	ret5	$lp
+
+.LJover:
+	move	W0, #0x7f800000
+	slt	$r15, W0, $r0
+	beqzs8	.LJnan
+#if defined(__NDS32_EXT_PERF__)&&!defined(__OPTIMIZE_SIZE__)
+	move	$r0, #0x80000000
+#else
+	move	$r0, W1
+#endif
+	ret5	$lp
+.LJnan:
+#if defined(__NDS32_EXT_PERF__)&&!defined(__OPTIMIZE_SIZE__)
+	move	$r0, #0x7fffffff
+#else
+	addi	$r0, W1, -1
+#endif
+	ret5	$lp
+#endif
 	.size	__fixsfsi, .-__fixsfsi
-#endif /* L_sf_to_si */
+#endif /* L_fixsfsi */
 
 
 
@@ -416,66 +419,72 @@
 
 #ifdef L_divdi3
 
-	!--------------------------------------
-	#ifdef __big_endian__
-		#define  V1H  $r0
-		#define  V1L  $r1
-		#define  V2H  $r2
-		#define  V2L  $r3
-	#else
-		#define  V1H  $r1
-		#define  V1L  $r0
-		#define  V2H  $r3
-		#define  V2L  $r2
-	#endif
-	!--------------------------------------
+#ifdef __big_endian__
+#define P1H	$r0
+#define P1L	$r1
+#define P2H	$r2
+#define P2L	$r3
+#else
+#define P1H	$r1
+#define P1L	$r0
+#define P2H	$r3
+#define P2L	$r2
+#endif
+
 	.text
 	.align	2
 	.globl	__divdi3
 	.type	__divdi3, @function
 __divdi3:
-	! prologue
-#ifdef __NDS32_ISA_V3M__
-	push25	$r10, 0
-#else
-	smw.adm	$r6, [$sp], $r10, 2
-#endif
-	! end of prologue
-	move	$r8, V1L
-	move	$r9, V1H
-	move	$r6, V2L
-	move	$r7, V2H
-	movi	$r10, 0
-	bgez	V1H, .L80
-	bal	__negdi2
-	move	$r8, V1L
-	move	$r9, V1H
-	movi	$r10, -1
+	! =====================================================================
+	! uint64_t __divdi3(uint64_t n, uint64-t d)
+	!
+	! This function divides n by d and returns the quotient.
+	!
+	! stack allocation:
+	! sp+8  +-----------------------+
+	!       | $lp                   |
+	! sp+4  +-----------------------+
+	!       | $r6                   |
+	! sp    +-----------------------+
+	! =====================================================================
+	smw.adm	$r6, [$sp], $r6, 2
+
+	xor	$r6, P1H, P2H
+	srai45	$r6, 31			! signof(numerator xor denominator)
+	! abs(denominator)
+	bgez	P2H, .L80
+	neg	P2H, P2H
+	beqz	P2L, .L80
+	neg	P2L, P2L
+	addi	P2H, P2H, -1
+
 .L80:
-	bgez	$r7, .L81
-	move	V1L, $r6
-	move	V1H, $r7
-	bal	__negdi2
-	move	$r6, V1L
-	move	$r7, V1H
-	nor	$r10, $r10, $r10
+	! abs(numerator)
+	bgez	P1H, .L81
+	neg	P1H, P1H
+	beqz	P1L, .L81
+	neg	P1L, P1L
+	addi	P1H, P1H, -1
+
 .L81:
-	move	V2L, $r6
-	move	V2H, $r7
-	move	V1L, $r8
-	move	V1H, $r9
-	movi	$r4, 0
+	! abs(numerator) / abs(denominator)
+	movi	$r4, 0			! ignore remainder
 	bal	__udivmoddi4
-	beqz	$r10, .L82
-	bal	__negdi2
+	! numerator / denominator
+	beqz	$r6, .L82
+	or	$r4, P1H, P1L
+	beqz	$r4, .L82
+	neg	P1H, P1H
+	beqz	P1L, .L82
+	neg	P1L, P1L
+	addi	P1H, P1H, -1
+
+	! to eliminate unaligned branch target
+	.align	2
 .L82:
-	! epilogue
-#ifdef __NDS32_ISA_V3M__
-	pop25	$r10, 0
-#else
-	lmw.bim	$r6, [$sp], $r10, 2
+	lmw.bim	$r6, [$sp], $r6, 2
 	ret
-#endif
 	.size	__divdi3, .-__divdi3
 #endif /* L_divdi3 */
 
@@ -500,7 +509,7 @@
 	cmovn	$r0, $r4, $r5			! $r0  <- |a|
 	! ---------------------------------------------------------------------
 	! if (b < 0)
-#ifndef __NDS32_PERF_EXT__
+#ifndef __NDS32_EXT_PERF__
 	! ---------------------------------------------------------------------
 	bgez	$r1, .L3			! if b >= 0, skip
 	! ---------------------------------------------------------------------
@@ -512,20 +521,20 @@
 	!!res = udivmodsi4 (a, b, 1);
 	! if (den != 0)
 	! ---------------------------------------------------------------------
-#else /* __NDS32_PERF_EXT__ */
+#else /* __NDS32_EXT_PERF__ */
 	!     b = -b;
 	!!res = udivmodsi4 (a, b, 1);
 	! if (den != 0)
 	! ---------------------------------------------------------------------
 	abs	$r1, $r1			! $r1  <- |b|
-#endif /* __NDS32_PERF_EXT__ */
+#endif /* __NDS32_EXT_PERF__ */
 	beqz	$r1, .L1			! if den == 0, skip
 	! ---------------------------------------------------------------------
 	! {   bit = 1;
 	!     res = 0;
 	! ---------------------------------------------------------------------
 	movi	$r4, 1				! $r4  <- bit = 1
-#ifndef __OPTIMIZE_SIZE__
+#if !(defined (__OPTIMIZE_SIZE__) && ! defined (__NDS32_ISA_V3M__))
 .L6:
 #endif
 	! ---------------------------------------------------------------------
@@ -587,102 +596,81 @@
 
 #ifdef L_moddi3
 
-	!--------------------------------------
-	#ifdef __big_endian__
-		#define  V1H  $r0
-		#define  V1L  $r1
-		#define  V2H  $r2
-		#define  V2L  $r3
-	#else
-		#define  V1H  $r1
-		#define  V1L  $r0
-		#define  V2H  $r3
-		#define  V2L  $r2
-	#endif
-	!--------------------------------------
+#ifdef __big_endian__
+#define P1H		$r0
+#define P1L		$r1
+#define P2H		$r2
+#define P2L		$r3
+#define OFFSET_H	0
+#define OFFSET_L	4
+#else
+#define P1H		$r1
+#define P1L		$r0
+#define P2H		$r3
+#define P2L		$r2
+#define OFFSET_H	4
+#define OFFSET_L	0
+#endif
+
 	.text
 	.align	2
 	.globl	__moddi3
 	.type	__moddi3, @function
 __moddi3:
 	! =====================================================================
+	! uint64_t __moddi3(uint64_t n, uint64-t d)
+	!
+	! This function divides n by d and returns the remainder.
+	!
 	! stack allocation:
-	! sp+32 +-----------------------+
-	!       | $lp                   |
-	! sp+28 +-----------------------+
-	!       | $r6 - $r10            |
+	! sp+16 +-----------------------+
+	!       | remainder             |
 	! sp+8  +-----------------------+
-	!       |                       |
+	!       | $lp                   |
 	! sp+4  +-----------------------+
-	!       |                       |
+	!       | $r6                   |
 	! sp    +-----------------------+
 	! =====================================================================
-	! prologue
-#ifdef __NDS32_ISA_V3M__
-	push25	$r10, 8
-#else
-	smw.adm	$r6, [$sp], $r10, 2
-	addi	$sp, $sp, -8
-#endif
-	! end of prologue
-	!------------------------------------------
-	! 	__moddi3 (DWtype u, DWtype v)
-	!		{
-	!			word_type c = 0;
-	!			DWunion uu = {.ll = u};
-	!			DWunion vv = {.ll = v};
-	!			DWtype w;
-	!		if (uu.s.high < 0)
-	!  		  c = ~c,
-	!		  uu.ll = -uu.ll;
-	!---------------------------------------------
-	move	$r8, V1L
-	move	$r9, V1H
-	move	$r6, V2L
-	move	$r7, V2H
-	movi	$r10, 0        ! r10 = c = 0
-	bgez	V1H, .L80      ! if u > 0 , go L80
-	bal	__negdi2
-	move	$r8, V1L
-	move	$r9, V1H
-	movi	$r10, -1       ! r10 = c = ~c
-	!------------------------------------------------
-	!	 	if (vv.s.high < 0)
-	!		  vv.ll = -vv.ll;
-	!----------------------------------------------
+	addi	$sp, $sp, -16
+	smw.bi	$r6, [$sp], $r6, 2
+
+	srai	$r6, P1H, 31		! signof(numerator)
+	! abs(denominator)
+	bgez	P2H, .L80
+	neg	P2H, P2H
+	beqz	P2L, .L80
+	neg	P2L, P2L
+	addi	P2H, P2H, -1
+
 .L80:
-	bgez	$r7, .L81     !  if v > 0 , go L81
-	move	V1L, $r6
-	move	V1H, $r7
-	bal	__negdi2
-	move	$r6, V1L
-	move	$r7, V1H
-	!------------------------------------------
-	!		(void) __udivmoddi4 (uu.ll, vv.ll, &w);
-	!		if (c)
-	!		  w = -w;
-	!		return w;
-	!-----------------------------------------
+	! abs(numerator)
+	beqz	$r6, .L81
+	neg	P1H, P1H
+	beqz	P1L, .L81
+	neg	P1L, P1L
+	addi	P1H, P1H, -1
+
 .L81:
-	move	V2L, $r6
-	move	V2H, $r7
-	move	V1L, $r8
-	move	V1H, $r9
-	addi	$r4, $sp, 0
+	! abs(numerator) % abs(denominator)
+	addi	$r4, $sp, 8
 	bal	__udivmoddi4
-	lwi	$r0, [$sp+(0)]    ! le: sp + 0 is low, be: sp + 0 is high
-	lwi	$r1, [$sp+(4)]    ! le: sp + 4 is low, be: sp + 4 is high
-	beqz	$r10, .L82
-	bal	__negdi2
+	! numerator % denominator
+	lwi	P1L, [$sp+(8+OFFSET_L)]
+	lwi	P1H, [$sp+(8+OFFSET_H)]
+	beqz	$r6, .L82
+	or	$r4, P1H, P1L
+	beqz	$r4, .L82
+	neg	P1H, P1H
+	beqz	P1L, .L82
+	neg	P1L, P1L
+	addi	P1H, P1H, -1
+
+	! to eliminate unaligned branch target
+	.align	2
 .L82:
-	! epilogue
-#ifdef __NDS32_ISA_V3M__
-	pop25	$r10, 8
-#else
-	addi	$sp, $sp, 8
-	lmw.bim	$r6, [$sp], $r10, 2
+	lmw.bi	$r6, [$sp], $r6, 2
+	addi	$sp, $sp, 16
 	ret
-#endif
 	.size	__moddi3, .-__moddi3
 #endif /* L_moddi3 */
 
@@ -822,197 +810,302 @@
 
 #ifdef L_udivdi3
 
-	!--------------------------------------
-	#ifdef __big_endian__
-		#define  V1H  $r0
-		#define  V1L  $r1
-		#define  V2H  $r2
-		#define  V2L  $r3
-	#else
-		#define  V1H  $r1
-		#define  V1L  $r0
-		#define  V2H  $r3
-		#define  V2L  $r2
-	#endif
-	!--------------------------------------
-
 	.text
 	.align	2
 	.globl	__udivdi3
 	.type	__udivdi3, @function
 __udivdi3:
-	! prologue
-#ifdef __NDS32_ISA_V3M__
-	push25	$r8, 0
-#else
-	smw.adm	$r6, [$sp], $r8, 2
-#endif
-	! end of prologue
-	movi	$r4, 0
-	bal	__udivmoddi4
-	! epilogue
-#ifdef __NDS32_ISA_V3M__
-	pop25	$r8, 0
-#else
-	lmw.bim	$r6, [$sp], $r8, 2
-	ret
-#endif
+	movi	$r4, 0			! ignore remainder
+	b	__udivmoddi4
 	.size	__udivdi3, .-__udivdi3
 #endif /* L_udivdi3 */
 
 
 
+#ifdef L_umul_ppmm
+
+#ifdef __big_endian__
+	#define P1H		$r0
+	#define P1L		$r1
+	#define P2H		$r2
+	#define P2L		$r3
+#else
+	#define P1H		$r1
+	#define P1L		$r0
+	#define P2H		$r3
+	#define P2L		$r2
+#endif
+#define W1	$r5
+
+	.text
+	.align	2
+	.globl	umul_ppmm
+	.type	umul_ppmm, @function
+	! =====================================================================
+	! uint64_t umul_ppmm(uint32_t a, uint32_t b)
+	!
+	! This function multiplies `a' by `b' to obtain a 64-bit product. The
+	! product is broken into two 32-bit pieces which are stored in the zl
+	! (low-part at P1L) and zh (high-part at P1H).
+	! =====================================================================
+umul_ppmm:
+	! ---------------------------------------------------------------------
+	! uint16_t ah, al, bh, bl;
+	! uint32_t zh, zA, zB, zl;
+	! al = a&0xffff;
+	! ah = a>>16;
+	! bl = b&0xffff;
+	! bh = b>>16;
+	! ---------------------------------------------------------------------
+	zeh	P2L, $r0	! al=a&0xffff
+	srli	P2H, $r0, 16	! ah=a>>16
+#ifdef __NDS32_EB__
+	srli	P1H, $r1, 16	! bh=b>>16
+	zeh	P1L, $r1	! bl=b&0xffff
+#else
+	zeh	P1L, $r1	! bl=b&0xffff
+	srli	P1H, $r1, 16	! bh=b>>16
+#endif
+	! ---------------------------------------------------------------------
+	! zA = ( (uint32_t) al ) * bh;
+	! zl = ( (uint32_t) al ) * bl;
+	! zB = ( (uint32_t) ah ) * bl;
+	! ---------------------------------------------------------------------
+	mul	W1, P2L, P1H	! zA=al*bh
+	mul	P2L, P2L, P1L	! zl=al*bl
+	mul	P1L, P2H, P1L	! zB=ah*bl
+	! ---------------------------------------------------------------------
+	! zh = ( (uint32_t) ah ) * bh;
+	! zA += zB;
+	! zh += ( ( (uint32_t) ( zA < zB ) )<<16 ) + ( zA>>16 );
+	! ---------------------------------------------------------------------
+	add	W1, W1, P1L	! zA+=zB
+	slt	$ta, W1, P1L	! zA<zB
+	slli	$ta, $ta, 16	! (zA<zB)<<16
+	maddr32 $ta, P2H, P1H	! zh=ah*bh+((zA<zB)<<16)
+	srli	P1H, W1, 16	! zA>>16
+	add	P1H, P1H, $ta	! zh+=(zA>>16)
+	! ---------------------------------------------------------------------
+	! zA <<= 16;
+	! zl += zA;
+	! zh += ( zl < zA );
+	! *zlPtr = zl;
+	! *zhPtr = zh;
+	! ---------------------------------------------------------------------
+	slli	P1L, W1, 16	! zA<<=16
+	add	P1L, P1L, P2L	! zl+=zA
+	slt	$ta, P1L, P2L	! zl<zA
+	add	P1H, P1H, $ta	! zh+=(zl<zA)
+	ret
+	.size	umul_ppmm, .-umul_ppmm
+#endif /* L_umul_ppmm */
+
+
+
 #ifdef L_udivmoddi4
 
+#ifdef __big_endian__
+	#define P1H		$r0
+	#define P1L		$r1
+#else
+	#define P1H		$r1
+	#define P1L		$r0
+#endif
+#define W0	$r4
+#define W1	$r5
+#define W2	P1L
+#define NHI	P1H		// n1
+#define NLO	P1L		// n0
+#define D	$r2		// d
+#define DLO	$r3		// d0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+#define DHI	$r10		// d1
+#else
+#define DHI	$r20		// d1
+#endif
+#define Q	NHI		// q/q0
+#define QHI	W0		// q1
+#define R	NLO		// r/r0
+#define RHI	NHI		// r1
+#define M	W2		// m
+
 	.text
 	.align	2
-	.globl	fudiv_qrnnd
 	.type	fudiv_qrnnd, @function
-	#ifdef __big_endian__
-		#define P1H     $r0
-		#define P1L     $r1
-		#define P2H     $r2
-		#define P2L     $r3
-		#define W6H     $r4
-		#define W6L     $r5
-		#define OFFSET_L 4
-		#define OFFSET_H 0
-	#else
-		#define P1H     $r1
-		#define P1L     $r0
-		#define P2H     $r3
-		#define P2L     $r2
-		#define W6H     $r5
-		#define W6L     $r4
-		#define OFFSET_L 0
-		#define OFFSET_H 4
-	#endif
+	! =====================================================================
+	! uint64_t fudiv_qrnnd(uint64_t n, uint32_t d)
+	!
+	! This function divides 64-bit numerator n by 32-bit denominator d. The
+	! 64-bit return value contains remainder (low-part at P1L) and quotient
+	! (high-part at P1H).
+	! Caller has to make sure that DHI is saved if necessary.
+	! =====================================================================
+	!------------------------------------------------------
+	!  in regs: ($r0,$r1) - NUMERATOR, $r2 - DENOMINATOR
+	! out regs: ($r0,$r1) - (Q,R)
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	!  scratch: $r3-$r5, $r10, $ta
+#else
+	!  scratch: $r3-$r5, $ta, $r20
+#endif
+	!------------------------------------------------------
 fudiv_qrnnd:
 	!------------------------------------------------------
-	! function:  fudiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)
-	!            divides a UDWtype, composed by the UWtype integers,HIGH_NUMERATOR (from $r4)
-	!            and LOW_NUMERATOR(from $r5) by DENOMINATOR(from $r6), and places the quotient
-	!            in $r7 and the remainder in $r8.
-	!------------------------------------------------------
-	!  in reg:$r4(n1), $r5(n0), $r6(d0)
-	!  __d1 = ((USItype) (d) >> ((4 * 8) / 2));
-	!  __d0 = ((USItype) (d) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
-	!  __r1 = (n1) % __d1;
-	!  __q1 = (n1) / __d1;
+	!  __d1 = ((USItype) (d) >> (W_TYPE_SIZE / 2));
+	!  __d0 = ((USItype) (d) & (((USItype) 1 << (W_TYPE_SIZE / 2)) - 1));
+	!  __r1 = (n1) % __d1; __q1 = (n1) / __d1;
 	!  __m = (USItype) __q1 * __d0;
-	!  __r1 = __r1 * ((USItype) 1 << ((4 * 8) / 2)) | ((USItype) (n0) >> ((4 * 8) / 2));
-	!   if (__r1 < __m)
-	!    {
-	!------------------------------------------------------
-	smw.adm $r0, [$sp], $r4, 2				! store $lp, when use BASELINE_V1,and must store $r0-$r3
-	srli	$r7, $r6, 16					! $r7 = d1 =__ll_highpart (d)
-	movi	$ta, 65535
-	and	  $r8, $r6, $ta       				! $r8 = d0 = __ll_lowpart (d)
-
-	divr	$r9, $r10, $r4, $r7				! $r9 = q1, $r10 = r1
-	and	  $r4, $r5, $ta       				! $r4 = __ll_lowpart (n0)
-	slli	$r10, $r10, 16      				! $r10 = r1 << 16
-	srli	$ta, $r5, 16        				! $ta = __ll_highpart (n0)
-
-	or	$r10, $r10, $ta					! $r10 <- $r0|$r3=__r1
-	mul	$r5, $r9, $r8					! $r5 = m =  __q1*__d0
-	slt	$ta, $r10, $r5					! $ta <- __r1<__m
-	beqz	$ta, .L2					!if yes,skip
-	!------------------------------------------------------
-	!    __q1--, __r1 += (d);
-	!    if (__r1 >= (d))
-	!     {
+	!  __r1 = __r1 * ((USItype) 1 << (W_TYPE_SIZE / 2)) | ((USItype) (n0) >> (W_TYPE_SIZE / 2));
+	!  if (__r1 < __m) {
 	!------------------------------------------------------
+	srli	DHI, D, 16		! d1 = ll_highpart (d)
+	zeh	W1, NLO			! ll_lowpart (n0)
+	srli	W2, NLO, 16		! ll_highpart (n0)
+	divr	QHI, RHI, NHI, DHI	! q1 = n1 / __d1, r1 = n1 % __d1
+	zeh	DLO, D			! d0 = ll_lowpart (d)
+	slli	RHI, RHI, 16		! r1 << 16
+	or	RHI, RHI, W2		! __r1 = (__r1 << 16) | ll_highpart(n0)
+	mul	M, QHI, DLO		! m =  __q1*__d0
+	slt	$ta, RHI, M		! __r1 < __m
+	beqz	$ta, .L2		! if no, skip
 
-	add	$r10, $r10, $r6					!$r10 <- __r1+d=__r1
-	addi	$r9, $r9, -1					!$r9 <- __q1--=__q1
-	slt	$ta, $r10, $r6					!$ta <- __r1<d
-	bnez	$ta, .L2					!if yes,skip
 	!------------------------------------------------------
-	!       if (__r1 < __m)
-	!        {
+	!    __q1--, __r1 += (d);
+	!    if (__r1 >= (d) && __r1 < __m) {
 	!------------------------------------------------------
+	addi	QHI, QHI, -1		! __q1--
+	add	RHI, RHI, D		! __r1 += d
+	slt	$ta, RHI, D		! __r1 < d
+	bnez	$ta, .L2		! if yes, skip
+	slt	$ta, RHI, M		! __r1 < __m
+	beqz	$ta, .L2		! if no, skip
 
-	slt	$ta, $r10, $r5					!$ta <- __r1<__m
-	beqz	$ta, .L2					!if yes,skip
 	!------------------------------------------------------
-	!           __q1--, __r1 += (d);
-	!        }
-	!     }
+	!       __q1--, __r1 += (d);
+	!    }
 	!  }
 	!------------------------------------------------------
+	addi	QHI, QHI, -1		! __q1--
+	add	RHI, RHI, D		! __r1 += d
 
-	addi	$r9, $r9, -1					!$r9 <- __q1--=__q1
-	add	$r10, $r10, $r6					!$r2 <- __r1+d=__r1
 .L2:
 	!------------------------------------------------------
 	!  __r1 -= __m;
-	!  __r0 = __r1 % __d1;
-	!  __q0 = __r1 / __d1;
+	!  __r0 = __r1 % __d1; __q0 = __r1 / __d1;
 	!  __m = (USItype) __q0 * __d0;
-	!  __r0 = __r0 * ((USItype) 1 << ((4 * 8) / 2)) \
-	!        | ((USItype) (n0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
-	!  if (__r0 < __m)
-	!   {
-	!------------------------------------------------------
-	sub  $r10, $r10, $r5					!$r10 <- __r1-__m=__r1
-	divr	$r7, $r10, $r10, $r7				!$r7 <- r1/__d1=__q0,$r10 <- r1%__d1=__r0
-	slli	$r10, $r10, 16					!$r10 <- __r0<<16
-	mul	$r5, $r8, $r7					!$r5 <- __q0*__d0=__m
-	or	$r10, $r4, $r10					!$r3 <- $r0|__ll_lowpart (n0) =__r0
-	slt	$ta, $r10, $r5					!$ta <- __r0<__m
-	beqz	$ta, .L5					!if yes,skip
-	!------------------------------------------------------
-	!      __q0--, __r0 += (d);
-	!      if (__r0 >= (d))
-	!       {
-	!------------------------------------------------------
-
-	add	$r10, $r10, $r6					!$r10 <- __r0+d=__r0
-	addi	$r7, $r7, -1					!$r7 <- __q0--=__q0
-	slt	$ta, $r10, $r6					!$ta <- __r0<d
-	bnez	$ta, .L5					!if yes,skip
-	!------------------------------------------------------
-	!         if (__r0 < __m)
-	!          {
-	!------------------------------------------------------
+	!  __r0 = __r0 * ((USItype) 1 << (W_TYPE_SIZE / 2)) \
+	!        | ((USItype) (n0) & (((USItype) 1 << (W_TYPE_SIZE / 2)) - 1));
+	!  if (__r0 < __m) {
+	!------------------------------------------------------
+	sub	RHI, RHI, M		! __r1 -= __m
+	divr	Q, W2, RHI, DHI		! __q0 = r1 / __d1, __r0 = r1 % __d1
+	slli	W2, W2, 16		! __r0 << 16
+	or	R, W2, W1		! __r0 = (__r0 << 16) | ll_lowpart(n0)
+#undef M
+#define M	DLO
+	mul	M, DLO, Q		! __m = __q0 * __d0
+	slt	$ta, R, M		! __r0 < __m
+	beqz	$ta, .L5		! if no, skip
+
+	!------------------------------------------------------
+	!    __q0--, __r0 += (d);
+	!    if (__r0 >= (d) && __r0 < __m) {
+	!------------------------------------------------------
+	add	R, R, D			! __r0 += d
+	addi	Q, Q, -1		! __q0--
+	slt	$ta, R, D		! __r0 < d
+	bnez	$ta, .L5		! if yes, skip
+	slt	$ta, R, M		! __r0 < __m
+	beqz	$ta, .L5		! if no, skip
 
-	slt	$ta, $r10, $r5					!$ta <- __r0<__m
-	beqz	$ta, .L5					!if yes,skip
 	!------------------------------------------------------
-	!             __q0--, __r0 += (d);
-	!          }
-	!       }
-	!   }
+	!      __q0--, __r0 += (d);
+	!    }
+	!  }
 	!------------------------------------------------------
+	add	R, R, D			! __r0 += d
+	addi	Q, Q, -1		! __q0--
 
-	add	  $r10, $r10, $r6				!$r3 <- __r0+d=__r0
-	addi	$r7, $r7, -1					!$r2 <- __q0--=__q0
 .L5:
 	!------------------------------------------------------
-	!   __r0 -= __m;
-	!   *q = (USItype) __q1 * ((USItype) 1 << ((4 * 8) / 2)) | __q0;
-	!   *r = __r0;
+	!  __r0 -= __m;
+	!  *q = (USItype) __q1 * ((USItype) 1 << (W_TYPE_SIZE / 2)) | __q0;
+	!  *r = __r0;
 	!}
 	!------------------------------------------------------
-
-	sub		$r8, $r10, $r5				!$r8 = r = r0 = __r0-__m
-	slli	$r9, $r9, 16					!$r9 <- __q1<<16
-	or	$r7, $r9, $r7					!$r7 = q = $r9|__q0
-	lmw.bim $r0, [$sp], $r4, 2
+	sub	R, R, M			! r = r0 = __r0 - __m
+	slli	QHI, QHI, 16		! __q1 << 16
+	or	Q, Q, QHI		! q = (__q1 << 16) | __q0
 	ret
 	.size	fudiv_qrnnd, .-fudiv_qrnnd
 
+
+
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+#define NREGS	$r6
+#define DREGS	$r8
+#else
+#define NREGS	$r16
+#define DREGS	$r18
+#endif
+#ifdef __big_endian__
+	#define P2H		$r2
+	#define P2L		$r3
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	#define NUMHI		$r6
+	#define NUMLO		$r7
+	#define DENHI		$r8
+	#define DENLO		$r9
+#else
+	#define NUMHI		$r16
+	#define NUMLO		$r17
+	#define DENHI		$r18
+	#define DENLO		$r19
+	#define W3H		$r22
+	#define W3L		$r23
+#endif
+	#define OFFSET_L	4
+	#define OFFSET_H	0
+#else
+	#define P2H		$r3
+	#define P2L		$r2
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	#define NUMHI		$r7
+	#define NUMLO		$r6
+	#define DENHI		$r9
+	#define DENLO		$r8
+#else
+	#define NUMHI		$r17
+	#define NUMLO		$r16
+	#define DENHI		$r19
+	#define DENLO		$r18
+	#define W3H		$r23
+	#define W3L		$r22
+#endif
+	#define OFFSET_L	0
+	#define OFFSET_H	4
+#endif
+#define MHI	P1H	// m1
+#define MLO	P1L	// m0
+#if defined(__NDS32_EXT_PERF__)||!defined(__NDS32_REDUCE_REGS__)
+#define BM	$r21	// bm
+#endif
+#undef W2
+#define W2	$r3
+
 	.align	2
 	.globl	__udivmoddi4
 	.type	__udivmoddi4, @function
-__udivmoddi4:
 	! =====================================================================
+	! uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
+	!
+	! This function divides 64-bit numerator n by 64-bit denominator d. The
+	! quotient is returned as 64-bit return value and the 64-bit remainder
+	! is stored at the input address r.
 	! stack allocation:
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
 	! sp+40 +------------------+
-	!       | q1               |
-	! sp+36 +------------------+
-	!       | q0               |
+	!       | q                |
 	! sp+32 +------------------+
 	!       | bm               |
 	! sp+28 +------------------+
@@ -1020,422 +1113,520 @@
 	! sp+24 +------------------+
 	!       | $fp              |
 	! sp+20 +------------------+
-	!       | $r6 - $r10       |
+	!       | $r10             |
+	! sp+16 +------------------+
+	!       | $r6 - $r9        |
+	! sp    +------------------+
+#else
+	! sp+8  +------------------+
+	!       | $lp              |
+	! sp+4  +------------------+
+	!       | $fp              |
 	! sp    +------------------+
+#endif
 	! =====================================================================
-
+	!------------------------------------------------------
+	!UDWtype __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+	!{
+	!  const DWunion nn = {.ll = n};
+	!  const DWunion dd = {.ll = d};
+	!  DWunion rr;
+	!  UWtype d0, d1, n0, n1, n2;
+	!  UWtype q0, q1;
+	!  UWtype b, bm;
+	!------------------------------------------------------
+	!  in regs: ($r0,$r1) - NUMERATOR, ($r2,$r3) - DENOMINATOR,
+	!           $r4 - pointer to REMAINDER
+	! out regs: ($r0,$r1) - QUOTIENT
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	!  scratch: $r2-$r9 , $ta, $fp, $lp
+#else
+	!  scratch: $r2-$r5, $ta, $r16-$r21 $fp, $lp
+#endif
+	!------------------------------------------------------
+__udivmoddi4:
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
 	addi	$sp, $sp, -40
-	smw.bi	$r6, [$sp], $r10, 10
+	smw.bi	$r6, [$sp], $r10 , 10
+#else
+	smw.adm	$sp, [$sp], $sp, 10
+#endif
+
 	!------------------------------------------------------
 	!  d0 = dd.s.low;
 	!  d1 = dd.s.high;
 	!  n0 = nn.s.low;
 	!  n1 = nn.s.high;
-	!  if (d1 == 0)
-	!   {
+	!  if (d1 == 0) {
 	!------------------------------------------------------
+	movd44	NREGS, $r0		! (n1,n0)
+	movd44	DREGS, $r2		! (d1,d0)
+	move	$fp, $r4		! rp
+	bnez	P2H, .L9		! if d1 != 0, skip
 
-	move	$fp, $r4					!$fp <- rp
-	bnez	P2H, .L9					!if yes,skip
 	!------------------------------------------------------
-	!     if (d0 > n1)
-	!      {
+	!      if (d0 > n1) {
+	!          /* 0q = nn / 0D */
 	!------------------------------------------------------
+	slt	$ta, NUMHI, DENLO	! n1 < d0
+	beqz	$ta, .L10		! if no, skip
 
-	slt	$ta, P1H, P2L					!$ta <- n1<d0
-	beqz	$ta, .L10					!if yes,skip
-#ifndef __NDS32_PERF_EXT__
-	smw.adm $r0, [$sp], $r5, 0
-	move    $r0, P2L
-	bal __clzsi2
-	move	$r7, $r0
-	lmw.bim $r0, [$sp], $r5, 0
+	!------------------------------------------------------
+	!          count_leading_zeros (bm, d0);
+	!          if (bm != 0) {
+	!              /* Normalize, i.e. make the most significant bit of the
+	!                 denominator set.  */
+	!------------------------------------------------------
+#ifdef __NDS32_EXT_PERF__
+	clz	$r0, DENLO
+#else
+	move	$r0, DENLO
+	bal	__clzsi2
+#endif
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	$r0,  [$sp+(28)]	! bm
 #else
-	clz  $r7, P2L
+	move	BM, $r0			! bm
 #endif
-	swi     $r7,  [$sp+(28)]
-	beqz	$r7, .L18					!if yes,skip
+	beqz	$r0, .LZskipnorm1	! if bm == 0, skip
+
+	!------------------------------------------------------
+	!              d0 = d0 << bm;
+	!              n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+	!              n0 = n0 << bm;
+	!          }
 	!------------------------------------------------------
-	!         d0 = d0 << bm;
-	!         n1 = (n1 << bm) | (n0 >> ((4 * 8) - bm));
-	!         n0 = n0 << bm;
+	sll	DENLO, DENLO, $r0	! d0 <<= bm
+	subri	W1, $r0, 32		! 32 - bm
+	srl	W1, NUMLO, W1		! n0 >> (32 - bm)
+	sll	NUMHI, NUMHI, $r0	! n1 << bm
+	or	NUMHI, NUMHI, W1	! n1 =  (n1 << bm) | (n0 >> (32 - bm))
+	sll	NUMLO, NUMLO, $r0	! n0 <<= bm
+
+.LZskipnorm1:
+	!------------------------------------------------------
+	!          fudiv_qrnnd (&q0, &n0, n1, n0, d0);
+	!          q1 = 0;
+	!          /* Remainder in n0 >> bm.  */
 	!      }
 	!------------------------------------------------------
-
-	subri	$r5, $r7, 32					!$r5 <- 32-bm
-	srl	$r5, P1L, $r5					!$r5 <- n0>>$r5
-	sll	$r6, P1H, $r7					!$r6 <- n1<<bm
-	or	P1H, $r6, $r5					!P2h <- $r5|$r6=n1
-	sll	P1L, P1L, $r7					!P1H <- n0<<bm=n0
-	sll	P2L, P2L, $r7					!P2L <- d0<<bm=d0
-.L18:
-	!------------------------------------------------------
-	!    fudiv_qrnnd (&q0, &n0, n1, n0, d0);
-	!    q1 = 0;
-	!  } #if (d0 > n1)
-	!------------------------------------------------------
-
-	move 	$r4,P1H						! give fudiv_qrnnd args
-	move 	$r5,P1L						!
-	move 	$r6,P2L						!
-	bal	fudiv_qrnnd					!calcaulte q0 n0
-	movi	$r6, 0						!P1L <- 0
-	swi     $r7,[$sp+32]                                    !q0
-	swi     $r6,[$sp+36]                                    !q1
-	move    P1L,$r8						!n0
+	movd44	$r0, NREGS		! (n1,n0)
+	move	$r2, DENLO		! d0
+	bal	fudiv_qrnnd		! calcaulte q0 n0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	P1H, [$sp+(32+OFFSET_L)]! q0
+#else
+	move	W3L, P1H		! q0
+#endif
+	move	NUMLO, P1L		! n0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	move	W1, 0
+	swi	W1, [$sp+(32+OFFSET_H)]	! q1 = 0
+#else
+	move	W3H, 0			! q1 = 0
+#endif
 	b	.L19
+
 .L10:
 	!------------------------------------------------------
-	!  else #if (d0 > n1)
-	!   {
-	!     if(d0 == 0)
+	!      else {
+	!          if (d0 == 0)
+	!            d0 = 1 / d0; /* Divide intentionally by zero.  */
 	!------------------------------------------------------
+	beqz	P2L, .LZdivzero		! if d0 != 0, skip
 
-	bnez	P2L, .L20					!if yes,skip
 	!------------------------------------------------------
-	!      d0 = 1 / d0;
+	!          count_leading_zeros (bm, d0);
+	!          if (bm == 0) {
+	!              /* From (n1 >= d0), (the most significant bit of d0 is set),
+	!                 conclude (the most significant bit of n1 is set) and (the
+	!                 leading quotient digit q1 = 1).
+	!                 This special case is necessary, not an optimization.
+	!                 (Shifts counts of W_TYPE_SIZE are undefined.)  */
 	!------------------------------------------------------
-
-	movi	$r4, 1						!P1L <- 1
-	divr	P2L, $r4, $r4, P2L				!$r9=1/d0,P1L=1%d0
-.L20:
-
-#ifndef __NDS32_PERF_EXT__
-	smw.adm $r0, [$sp], $r5, 0
-	move    $r0, P2L
-	bal __clzsi2
-	move    $r7, $r0
-	lmw.bim $r0, [$sp], $r5, 0
+#ifdef __NDS32_EXT_PERF__
+	clz	$r0, DENLO
+#else
+	move	$r0, DENLO
+	bal	__clzsi2
+#endif
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	$r0, [$sp+(28)]		! bm
 #else
-	clz  $r7, P2L
+	move	BM, $r0			! bm
 #endif
-	swi     $r7,[$sp+(28)]      ! store bm
-	beqz	$r7, .L28					! if yes,skip
+	bnez	$r0, .LZnorm1		! if bm != 0, skip
+
 	!------------------------------------------------------
-	!         b = (4 * 8) - bm;
-	!         d0 = d0 << bm;
-	!         n2 = n1 >> b;
-	!         n1 = (n1 << bm) | (n0 >> b);
-	!         n0 = n0 << bm;
-	!         fudiv_qrnnd (&q1, &n1, n2, n1, d0);
-	!    }
+	!              n1 -= d0;
+	!              q1 = 1;
+	!          }
 	!------------------------------------------------------
-
-	subri	$r10, $r7, 32					!$r10 <- 32-bm=b
-	srl	$r4, P1L, $r10					!$r4 <- n0>>b
-	sll	$r5, P1H, $r7					!$r5 <- n1<<bm
-	or	$r5, $r5, $r4					!$r5 <- $r5|$r4=n1  !for fun
-	sll	P2L, P2L, $r7					!P2L <- d0<<bm=d0   !for fun
-	sll	P1L, P1L, $r7					!P1L <- n0<<bm=n0
-	srl	$r4, P1H, $r10					!$r4 <- n1>>b=n2    !for fun
-
-	move    $r6,P2L                     			!for fun
-	bal	fudiv_qrnnd					!caculate q1, n1
-
-	swi  $r7,[$sp+(36)]          ! q1 store
-	move P1H,$r8                 ! n1 store
-
-	move $r4,$r8	             ! prepare for next fudiv_qrnnd()
-	move $r5,P1L
-	move $r6,P2L
+	sub	NUMHI, NUMHI, DENLO	! n1 -= d0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	movi	W1, 1
+	swi	W1, [$sp+(32+OFFSET_H)]	! q1 = 1
+#else
+	movi	W3H, 1			! q1 = 1
+#endif
 	b	.L29
-.L28:
+
+	! to eliminate unaligned branch target
+	.align	2
+.LZnorm1:
 	!------------------------------------------------------
-	!    else // bm != 0
-	!     {
-	!        n1 -= d0;
-	!        q1 = 1;
-	!
+	!          else {
+	!              /* Normalize.  */
+	!              b = W_TYPE_SIZE - bm;
+	!              d0 = d0 << bm;
+	!              n2 = n1 >> b;
+	!              n1 = (n1 << bm) | (n0 >> b);
+	!              n0 = n0 << bm;
+	!              fudiv_qrnnd (&q1, &n1, n2, n1, d0);
+	!          }
 	!------------------------------------------------------
+	subri	$ta, $r0, 32		! b = 32 - bm
+	sll	DENLO, DENLO, $r0	! d0 <<= bm
+	move	$r2, DENLO
+	srl	W0, NUMLO, $ta		! n0 >> b
+	sll	W1, NUMHI, $r0		! n1 << bm
+	sll	NUMLO, NUMLO, $r0	! n0 <<= bm
+	or	P1L, W1, W0		! n1 = (n1 << bm) | (n0 >> b)
+	srl	P1H, NUMHI, $ta		! n2 = n1 >> b
+	bal	fudiv_qrnnd		! caculate q1, n1
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	P1H, [$sp+(32+OFFSET_H)]! q1
+#else
+	move	W3H, P1H		! q1
+#endif
+	move	NUMHI, P1L		! n1
 
-	sub	P1H, P1H, P2L					!P1L <- n1-d0=n1
-	movi	$ta, 1						!
-	swi	$ta, [$sp+(36)]	                                !1 -> [$sp+(36)]
-
-	move $r4,P1H						! give fudiv_qrnnd args
-	move $r5,P1L
-	move $r6,P2L
 .L29:
 	!------------------------------------------------------
-	!    fudiv_qrnnd (&q0, &n0, n1, n0, d0);
+	!          /* n1 != d0...  */
+	!          fudiv_qrnnd (&q0, &n0, n1, n0, d0);
+	!          /* Remainder in n0 >> bm.  */
+	!      }
 	!------------------------------------------------------
+	movd44	$r0, NREGS		! (n1,n0)
+	move	$r2, DENLO		! d0
+	bal	fudiv_qrnnd		! calcuate q0, n0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	P1H, [$sp+(32+OFFSET_L)]
+#else
+	move	W3L, P1H
+#endif
+	move	NUMLO, P1L
 
-	bal	fudiv_qrnnd					!calcuate  q0, n0
-	swi     $r7,[$sp+(32)]  !q0 store
-	move    P1L,$r8		!n0
+	! to eliminate unaligned branch target
+	.align	2
 .L19:
 	!------------------------------------------------------
-	!    if (rp != 0)
-	!     {
+	!      if (rp != 0) {
 	!------------------------------------------------------
+	beqz	$fp, .LZsetq		! if rp == 0, skip
 
-	beqz	$fp, .L31					!if yes,skip
 	!------------------------------------------------------
-	!         rr.s.low = n0 >> bm;
-	!         rr.s.high = 0;
-	!         *rp = rr.ll;
-	!     }
+	!          rr.s.low = n0 >> bm; rr.s.high = 0;
+	!          *rp = rr.ll;
+	!      }
+	!  }
 	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	lwi	W2, [$sp+(28)]		! bm
+#endif
+	movi	NUMHI, 0
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	srl	NUMLO, NUMLO, W2	! n0 >> bm
+#else
+	srl	NUMLO, NUMLO, BM	! n0 >> bm
+#endif
+	b	.LZsetr
+
+	! to eliminate unaligned branch target
+	.align	2
+.LZdivzero:
+	! divide-by-zero exception or quotient = 0 and remainder = 0 returned
+	divr	NUMHI, NUMLO, DENLO, DENLO
+
+.LZqzero:
+	movi	P1H, 0
+	movi	P1L, 0
+	beqz	$fp, .LZret		! if rp == NULL, skip
+
+	swi	NUMLO, [$fp+OFFSET_L]	! *rp
+	swi	NUMHI, [$fp+OFFSET_H]
+	b	.LZret
 
-	movi    $r5, 0							!$r5 <- 0
-	lwi     $r7,[$sp+(28)]    					!load bm
-	srl	$r4, P1L, $r7     	     				!$r4 <- n0>>bm
-        swi	$r4, [$fp+OFFSET_L]	  !r0				!$r4 -> [$sp+(48)]
-	swi	$r5, [$fp+OFFSET_H]	  !r1				!0 -> [$sp+(52)]
-	b .L31
 .L9:
 	!------------------------------------------------------
-	! else # d1 == 0
-	!  {
-	!     if(d1 > n1)
-	!      {
+	!  else { 
+	!      if (d1 > n1) {
+	!          /* 00 = nn / DD */
+	!          q0 = 0; q1 = 0;
+	!          /* Remainder in n1n0.  */
+	!          if (rp != 0) {
+	!              rr.s.low = n0; rr.s.high = n1;
+	!              *rp = rr.ll;
+	!          }
+	!      }
 	!------------------------------------------------------
+	slt	$ta, NUMHI, DENHI	! n1 < d1
+	bnez	$ta, .LZqzero		! if yes, skip
 
-	slt	$ta, P1H, P2H					!$ta <- n1<d1
-	beqz	$ta, .L32					!if yes,skip
 	!------------------------------------------------------
-	!         q0 = 0;
-	!	  q1 = 0;
-	!         if (rp != 0)
-	!          {
+	!      else {
+	!          /* 0q = NN / dd */
+	!          count_leading_zeros (bm, d1);
+	!          if (bm != 0) {
+	!              /* Normalize.  */
+	!              UWtype m1, m0;
 	!------------------------------------------------------
+#ifdef __NDS32_EXT_PERF__
+	clz	$r0, DENHI
+#else
+	move	$r0, DENHI
+	bal	__clzsi2
+#endif
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	$r0, [$sp+(28)]		! bm
+#else
+	move	BM, $r0			! bm
+#endif
+	beqz	$r0, .LZskipnorm2	! if bm == 0, skip
 
-	movi	$r5, 0						!$r5 <- 0
-	swi	$r5, [$sp+(32)]	   !q0				!0 -> [$sp+(40)]=q1
-	swi	$r5, [$sp+(36)]    !q1				!0 -> [$sp+(32)]=q0
-	beqz	$fp, .L31					!if yes,skip
 	!------------------------------------------------------
-	!             rr.s.low = n0;
-	!	      rr.s.high = n1;
-	!             *rp = rr.ll;
-	!          }
-	!------------------------------------------------------
-
-	swi	P1L, [$fp+OFFSET_L]					!P1L -> [rp]
-	swi	P1H, [$fp+OFFSET_H]					!P1H -> [rp+4]
-	b	.L31
-.L32:
-#ifndef __NDS32_PERF_EXT__
-	smw.adm $r0, [$sp], $r5, 0
-	move    $r0, P2H
-	bal __clzsi2
-	move    $r7, $r0
-	lmw.bim $r0, [$sp], $r5, 0
-#else
-	clz  $r7,P2H
-#endif
-        swi     $r7,[$sp+(28)] 	                                !$r7=bm  store
-	beqz	$r7, .L42					!if yes,skip
-	!------------------------------------------------------
-	!        USItype m1, m0;
-	!        b = (4 * 8) - bm;
-	!        d1 = (d0 >> b) | (d1 << bm);
-	!        d0 = d0 << bm;
-	!        n2 = n1 >> b;
-	!        n1 = (n0 >> b) | (n1 << bm);
-	!        n0 = n0 << bm;
-	!        fudiv_qrnnd (&q0, &n1, n2, n1, d1);
-	!------------------------------------------------------
-
-	subri	$r10, $r7, 32					!$r10 <- 32-bm=b
-	srl	$r5, P2L, $r10					!$r5 <- d0>>b
-	sll	$r6, P2H, $r7					!$r6 <- d1<<bm
-	or      $r6, $r5, $r6                                   !$r6 <- $r5|$r6=d1  !! func
-	move	P2H, $r6 					!P2H <- d1
-	srl     $r4, P1H, $r10                                  !$r4 <- n1>>b=n2    !!! func
-	srl	$r8, P1L, $r10					!$r8 <- n0>>b       !!$r8
-	sll     $r9, P1H, $r7                                   !$r9 <- n1<<bm
-	or	$r5, $r8, $r9					!$r5 <- $r8|$r9=n1  !func
-	sll     P2L, P2L, $r7                                   !P2L <- d0<<bm=d0
-	sll	P1L, P1L, $r7					!P1L <- n0<<bm=n0
-
-	bal	fudiv_qrnnd					! cal  q0,n1
-	swi     $r7,[$sp+(32)]
-	move    P1H,$r8            ! fudiv_qrnnd (&q0, &n1, n2, n1, d1);
-        move    $r6, $r7           ! from func
+	!              b = W_TYPE_SIZE - bm;
+	!              d1 = (d0 >> b) | (d1 << bm);
+	!              d0 = d0 << bm;
+	!              n2 = n1 >> b;
+	!              n1 = (n0 >> b) | (n1 << bm);
+	!              n0 = n0 << bm;
+	!              fudiv_qrnnd (&q0, &n1, n2, n1, d1);
+	!------------------------------------------------------
+	subri	W0, $r0, 32		! b = 32 - bm
+	srl	W1, DENLO, W0		! d0 >> b
+	sll	$r2, DENHI, $r0		! d1 << bm
+	or	$r2, $r2, W1		! d1 = (d0 >> b) | (d1 << bm)
+	move	DENHI, $r2
+	sll	DENLO, DENLO, $r0	! d0 <<= bm
+	srl	W2, NUMLO, W0		! n0 >> b
+	sll	NUMLO, NUMLO, $r0	! n0 <<= bm
+	sll	P1L, NUMHI, $r0		! n1 << bm
+	srl	P1H, NUMHI, W0		! n2 = n1 >> b
+	or	P1L, P1L, W2		! n1 = (n0 >> b) | (n1 << bm)
+	bal	fudiv_qrnnd		! calculate  q0, n1
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	P1H, [$sp+(32+OFFSET_L)]
+#else
+	move	W3L, P1H
+#endif
+	move	NUMHI, P1L
 
 	!----------------------------------------------------
-	!       #umul_ppmm (m1, m0, q0, d0);
-	!        do
-	!         {     USItype __x0, __x1, __x2, __x3;
-	!               USItype __ul, __vl, __uh, __vh;
-	!               __ul = ((USItype) (q0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
-	!               __uh = ((USItype) (q0) >> ((4 * 8) / 2));
-	!               __vl = ((USItype) (d0) & (((USItype) 1 << ((4 * 8) / 2)) - 1));
-	!               __vh = ((USItype) (d0) >> ((4 * 8) / 2));
-	!               __x0 = (USItype) __ul * __vl;
-	!               __x1 = (USItype) __ul * __vh;
-	!               __x2 = (USItype) __uh * __vl;
-	!               __x3 = (USItype) __uh * __vh;
-	!               __x1 += ((USItype) (__x0) >> ((4 * 8) / 2));
-	!               __x1 += __x2;
-	!               if (__x1 < __x2)
-	!                  __x3 += ((USItype) 1 << ((4 * 8) / 2));
-	!               (m1) = __x3 + ((USItype) (__x1) >> ((4 * 8) / 2));
-	!               (m0) = (USItype)(q0*d0);
-	!        }
-	!        if (m1 > n1)
+	!              umul_ppmm (m1, m0, q0, d0);
+	!!!!!!         do {
+	!!!!!!             USItype __x0, __x1, __x2, __x3;
+	!!!!!!             USItype __ul, __vl, __uh, __vh;
+	!!!!!!             __ul = ((USItype) (q0) & (((USItype) 1 << (W_TYPE_SIZE / 2)) - 1));
+	!!!!!!             __uh = ((USItype) (q0) >> (W_TYPE_SIZE / 2));
+	!!!!!!             __vl = ((USItype) (d0) & (((USItype) 1 << (W_TYPE_SIZE / 2)) - 1));
+	!!!!!!             __vh = ((USItype) (d0) >> (W_TYPE_SIZE / 2));
+	!!!!!!             __x0 = (USItype) __ul * __vl;
+	!!!!!!             __x1 = (USItype) __ul * __vh;
+	!!!!!!             __x2 = (USItype) __uh * __vl;
+	!!!!!!             __x3 = (USItype) __uh * __vh;
+	!!!!!!             __x1 += ((USItype) (__x0) >> (W_TYPE_SIZE / 2));
+	!!!!!!             __x1 += __x2;
+	!!!!!!             if (__x1 < __x2)
+	!!!!!!               __x3 += ((USItype) 1 << (W_TYPE_SIZE / 2));
+	!!!!!!             (m1) = __x3 + ((USItype) (__x1) >> (W_TYPE_SIZE / 2));
+	!!!!!!             (m0) = (USItype)(q0*d0);
+	!!!!!!         }
+	!              if (m1 > n1 || (m1 == n1 && m0 > n0)) {
 	!---------------------------------------------------
 #ifdef __NDS32_ISA_V3M__
-        !mulr64  $r4, P2L, $r6
-	smw.adm $r0, [$sp], $r3, 0
-	move	P1L, P2L
-	move	P2L, $r6
-	movi	P1H, 0
-	movi	P2H, 0
-	bal	__muldi3
-	movd44	$r4, $r0
-	lmw.bim $r0, [$sp], $r3, 0
-        move    $r8, W6H
-        move    $r5, W6L
+	move	P1L, DENLO		! d0
+	bal	umul_ppmm
 #else
-        mulr64  $r4, P2L, $r6
-        move    $r8, W6H
-        move    $r5, W6L
+	mulr64	$r0, P1H, DENLO
 #endif
-	slt	$ta, P1H, $r8					!$ta <- n1<m1
-	bnez	$ta, .L46					!if yes,skip
-	!------------------------------------------------------
-	!   if(m1 == n1)
-	!------------------------------------------------------
-
-	bne	$r8, P1H, .L45					!if yes,skip
-	!------------------------------------------------------
-	!   if(m0 > n0)
-	!------------------------------------------------------
+	slt	$ta, NUMHI, MHI		! n1 < m1
+	bnez	$ta, .L46		! if yes, skip
+	bne	MHI, NUMHI, .L45	! if m1 != n1, skip
+	slt	$ta, NUMLO, MLO		! n0 < m0
+	beqz	$ta, .L45		! if no, skip
 
-	slt	$ta, P1L, $r5					!$ta <- n0<m0
-	beqz	$ta, .L45					!if yes,skip
 .L46:
 	!------------------------------------------------------
-	!    {
-	!       q0--;
-	!       # sub_ddmmss (m1, m0, m1, m0, d1, d0);
-	!       do
-	!        {   USItype __x;
-	!            __x = (m0) - (d0);
-	!            (m1) = (m1) - (d1) - (__x > (m0));
-	!            (m0) = __x;
-	!        }
-	!    }
-	!------------------------------------------------------
+	!                  q0--;
+	!                  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+	!!!!!!             do {
+	!!!!!!                 USItype __x;
+	!!!!!!                 __x = (m0) - (d0);
+	!!!!!!                 (m1) = (m1) - (d1) - (__x > (m0));
+	!!!!!!                 (m0) = __x;
+	!!!!!!             }
+	!              }
+	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	lwi	W2, [$sp+(32+OFFSET_L)]
+	sub	MHI, MHI, DENHI		! m1 - d1
+	addi	W2, W2, -1		! q0--
+	swi	W2, [$sp+(32+OFFSET_L)]
+#else
+	addi	W3L, W3L, -1		! q0--
+	sub	MHI, MHI, DENHI		! m1 - d1
+#endif
+	sub	W2, MLO, DENLO		! __x = m0 - d0
+	slt	$ta, MLO, W2		! m0 < __x
+	sub	MHI, MHI, $ta		! m1 = m1 - d1 - (__x > m0)
+	move	MLO, W2			! m0 = __x
 
-	sub	$r4, $r5, P2L					!$r4 <- m0-d0=__x
-	addi	$r6, $r6, -1					!$r6 <- q0--=q0
-	sub	$r8, $r8, P2H					!$r8 <- m1-d1
-	swi	$r6, [$sp+(32)]	      ! q0			!$r6->[$sp+(32)]
-	slt	$ta, $r5, $r4					!$ta <- m0<__x
-	sub	$r8, $r8, $ta					!$r8 <- P1H-P1L=m1
-	move	$r5, $r4					!$r5 <- __x=m0
 .L45:
 	!------------------------------------------------------
-	!    q1 = 0;
-	!    if (rp != 0)
-	!     {
-	!------------------------------------------------------
-
-	movi	$r4, 0						!$r4 <- 0
-	swi	$r4, [$sp+(36)]					!0 -> [$sp+(40)]=q1
-	beqz	$fp, .L31					!if yes,skip
-	!------------------------------------------------------
-	!      # sub_ddmmss (n1, n0, n1, n0, m1, m0);
-	!      do
-	!       {   USItype __x;
-	!           __x = (n0) - (m0);
-	!           (n1) = (n1) - (m1) - (__x > (n0));
-	!           (n0) = __x;
-	!       }
-	!       rr.s.low = (n1 << b) | (n0 >> bm);
-	!       rr.s.high = n1 >> bm;
-	!       *rp = rr.ll;
-	!------------------------------------------------------
-
-	sub	$r4, P1H, $r8					!$r4 <- n1-m1
-	sub	$r6, P1L, $r5					!$r6 <- n0-m0=__x=n0
-	slt	$ta, P1L, $r6					!$ta <- n0<__x
-	sub	P1H, $r4, $ta					!P1H <- $r4-$ta=n1
-	move    P1L, $r6
-
-	lwi     $r7,[$sp+(28)]         ! load bm
-	subri   $r10,$r7,32
-	sll	$r4, P1H, $r10					!$r4 <- n1<<b
-	srl	$r5, P1L, $r7					!$r5 <- __x>>bm
-	or	$r6, $r5, $r4					!$r6 <- $r5|$r4=rr.s.low
-	srl	$r8, P1H, $r7					!$r8 <- n1>>bm =rr.s.high
-	swi	$r6, [$fp+OFFSET_L]				!
-	swi	$r8, [$fp+OFFSET_H]				!
-	b	.L31
-.L42:
-	!------------------------------------------------------
-	!  else
-	!   {
-	!     if(n1 > d1)
-	!------------------------------------------------------
-
-	slt	$ta, P2H, P1H					!$ta <- P2H<P1H
-	bnez	$ta, .L52					!if yes,skip
-	!------------------------------------------------------
-	!     if (n0 >= d0)
-	!------------------------------------------------------
-
-	slt	$ta, P1L, P2L					!$ta <- P1L<P2L
-	bnez	$ta, .L51					!if yes,skip
-	!------------------------------------------------------
-	!        q0 = 1;
-	!        do
-	!         {   USItype __x;
-	!             __x = (n0) - (d0);
-	!             (n1) = (n1) - (d1) - (__x > (n0));
-	!             (n0) = __x;
-	!         }
+	!                q1 = 0;
+	!                if (rp != 0) {
+	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	movi	W2, 0
+	swi	W2, [$sp+(32+OFFSET_H)]	! q1 = 0
+#else
+	movi	W3H, 0			! q1 = 0
+#endif
+	beqz	$fp, .LZsetq		! if yes, skip
+
+	!------------------------------------------------------
+	!                    sub_ddmmss (n1, n0, n1, n0, m1, m0);
+	!!!!!!               do {
+	!!!!!!                   USItype __x;
+	!!!!!!                   __x = (n0) - (m0);
+	!!!!!!                   (n1) = (n1) - (m1) - (__x > (n0));
+	!!!!!!                   (n0) = __x;
+	!!!!!!               }
+	!                    rr.s.low = (n1 << b) | (n0 >> bm);
+	!                    rr.s.high = n1 >> bm;
+	!                    *rp = rr.ll;
+	!                }
+	!            }
+	!------------------------------------------------------
+	sub	P1L, NUMLO, MLO		! __x = n0 - m0
+	sub	P1H, NUMHI, MHI		! n1 - m1
+	slt	$ta, NUMLO, P1L		! n0 < __x
+	sub	P1H, P1H, $ta		! n1 = n1 - m1 - (__x > n0)
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	lwi	W2, [$sp+(28)]		! bm
+	subri	W0, W2, 32		! b
+	sll	NUMHI, P1H, W0		! n1 << b
+	srl	NUMLO, P1L, W2		! n0 >> bm
+	or	NUMLO, NUMLO, NUMHI	! (n1 << b) | (n0 >> bm)
+	srl	NUMHI, P1H, W2		! n1 >> bm
+#else
+	subri	W0, BM, 32		! b
+	sll	NUMHI, P1H, W0		! n1 << b
+	srl	NUMLO, P1L, BM		! n0 >> bm
+	or	NUMLO, NUMLO, NUMHI	! (n1 << b) | (n0 >> bm)
+	srl	NUMHI, P1H, BM		! n1 >> bm
+#endif
+
+.LZsetr:
+	swi	NUMLO, [$fp+OFFSET_L]	! remainder
+	swi	NUMHI, [$fp+OFFSET_H]
+
+.LZsetq:
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	lwi	P1L, [$sp+(32+OFFSET_L)]! quotient
+	lwi	P1H, [$sp+(32+OFFSET_H)]
+#else
+	move	P1L, W3L		! quotient
+	move	P1H, W3H
+#endif
+
+	! to eliminate unaligned branch target
+	.align	2
+.LZret:
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	lmw.bi	$r6, [$sp], $r10 , 10
+	addi	$sp, $sp, 40
+#else
+	lmw.bim	$sp, [$sp], $sp, 10
+#endif
+	ret
+
+.LZskipnorm2:
 	!------------------------------------------------------
+	!            else {
+	!                /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+	!                   conclude (the most significant bit of n1 is set) /\ (the
+	!                   quotient digit q0 = 0 or 1).
+	!                   This special case is necessary, not an optimization.  */
+	!                if (n1 > d1 || n0 >= d0) {
+	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	move	W2, 0
+#endif
+	slt	$ta, DENHI, NUMHI	! n1 > d1
+	bnez	$ta, .L52		! if yes, skip
+	slt	$ta, NUMLO, DENLO	! n0 < d0
+	bnez	$ta, .L51		! if yes, skip
+
 .L52:
-	sub	$r4, P1H, P2H					!$r4 <- P1H-P2H
-	sub	$r6, P1L, P2L					!$r6 <- no-d0=__x=n0
-	slt	$ta, P1L, $r6					!$ta <- no<__x
-	sub	P1H, $r4, $ta					!P1H <- $r4-$ta=n1
-	move    P1L, $r6					!n0
-	movi	$r5, 1						!
-	swi	$r5, [$sp+(32)]					!1 -> [$sp+(32)]=q0
+	!------------------------------------------------------
+	!                    q0 = 1;
+	!                    sub_ddmmss (n1, n0, n1, n0, d1, d0);
+	!!!!!!               do {
+	!!!!!!                   USItype __x;
+	!!!!!!                   __x = (n0) - (d0);
+	!!!!!!                   (n1) = (n1) - (d1) - (__x > (n0));
+	!!!!!!                   (n0) = __x;
+	!!!!!!               }
+	!                }
+	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	move	W1, 1
+	swi	W1, [$sp+(32+OFFSET_L)]	! q0 = 1
+#else
+	movi	W3L, 1			! q0 = 1
+#endif
+	sub	W0, NUMLO, DENLO	! __x = n0 - d0
+	sub	NUMHI, NUMHI, DENHI	! n1 - d1
+	slt	$ta, NUMLO, W0		! n0 < __x
+	sub	NUMHI, NUMHI, $ta	! n1 = n1 -d1 - (_-x > n0)
+	move	NUMLO, W0		! n0 = __x
 	b	.L54
+
 .L51:
 	!------------------------------------------------------
-	!       q0 = 0;
+	!                else
+	!                  q0 = 0;
 	!------------------------------------------------------
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	W2, [$sp+(32+OFFSET_L)]	! q0 = 0
+#else
+	movi	W3L, 0			! q0 = 0
+#endif
 
-	movi    $r5,0
-	swi	$r5, [$sp+(32)]					!$r5=0 -> [$sp+(32)]
 .L54:
 	!------------------------------------------------------
-	!       q1 = 0;
-	!       if (rp != 0)
-	!        {
-	!------------------------------------------------------
-
-	movi	$r5, 0						!
-	swi	$r5, [$sp+(36)]					!0 -> [$sp+(36)]
-	beqz	$fp, .L31
-	!------------------------------------------------------
-	!          rr.s.low = n0;
-	!          rr.s.high = n1;
-	!          *rp = rr.ll;
+	!                q1 = 0;
+	!                if (rp != 0) {
+	!                    rr.s.low = n0; rr.s.high = n1;
+	!                    *rp = rr.ll;
+	!                }
+	!            }
 	!        }
-	!------------------------------------------------------
-
-	swi	P1L, [$fp+OFFSET_L]				!remainder
-	swi	P1H, [$fp+OFFSET_H]				!
-.L31:
-	!------------------------------------------------------
-	! const DWunion ww = {{.low = q0, .high = q1}};
-	! return ww.ll;
+	!    }
+	!  const DWunion ww = {{.low = q0, .high = q1}};
+	!  return ww.ll;
 	!}
 	!------------------------------------------------------
-
-	lwi	P1L, [$sp+(32)]					!quotient
-	lwi	P1H, [$sp+(36)]
-	lmw.bim	$r6, [$sp], $r10, 10
-	addi	$sp, $sp, 12
-	ret
+#if defined(__NDS32_REDUCE_REGS__)||!defined(__NDS32_EXT_PERF__)
+	swi	W2, [$sp+(32+OFFSET_H)]	! q1 = 0
+#else
+	movi	W3H, 0
+#endif
+	bnez	$fp, .LZsetr
+	b	.LZsetq
 	.size	__udivmoddi4, .-__udivmoddi4
 #endif /* L_udivmoddi4 */
 
@@ -1520,36 +1711,41 @@
 
 #ifdef L_umoddi3
 
-	!--------------------------------------
-	#ifdef __big_endian__
-		#define  V1H  $r0
-		#define  V1L  $r1
-		#define  V2H  $r2
-		#define  V2L  $r3
-	#else
-		#define  V1H  $r1
-		#define  V1L  $r0
-		#define  V2H  $r3
-		#define  V2L  $r2
-	#endif
-	!--------------------------------------
+#ifdef __big_endian__
+#define P1H		$r0
+#define P1L		$r1
+#define OFFSET_H	0
+#define OFFSET_L	4
+#else
+#define P1H		$r1
+#define P1L		$r0
+#define OFFSET_H	4
+#define OFFSET_L	0
+#endif
+
 	.text
 	.align	2
 	.globl	__umoddi3
 	.type	__umoddi3, @function
 __umoddi3:
-	! prologue
+	! =====================================================================
+	! stack allocation:
+	! sp+12 +-----------------------+
+	!       | remainder             |
+	! sp+4  +-----------------------+
+	!       | $lp                   |
+	! sp    +-----------------------+
+	! =====================================================================
 	addi	$sp, $sp, -12
 	swi $lp, [$sp+(0)]
-	! end of prologue
+
 	addi	$r4, $sp, 4
 	bal	__udivmoddi4
-	lwi	$r0, [$sp+(4)]    ! __udivmoddi4 return low when LE mode or return high when BE mode
-	lwi	$r1, [$sp+(8)]    !
-.L82:
+	lwi	P1L, [$sp+(4+OFFSET_L)]
+	lwi	P1H, [$sp+(4+OFFSET_H)]
+
 	! epilogue
-	lwi $lp, [$sp+(0)]
-	addi	$sp, $sp, 12
+	lwi.bi $lp, [$sp], 12
 	ret
 	.size	__umoddi3, .-__umoddi3
 #endif /* L_umoddi3 */
@@ -1559,64 +1755,51 @@
 #ifdef L_muldi3
 
 #ifdef __big_endian__
-	#define P1H	$r0
-	#define P1L	$r1
-	#define P2H	$r2
-	#define P2L	$r3
-
-	#define V2H $r4
-	#define V2L $r5
-#else
-	#define P1H	$r1
-	#define P1L	$r0
-	#define P2H	$r3
-	#define P2L	$r2
-
-	#define V2H $r5
-	#define V2L $r4
+#define P1H	$r0
+#define P1L	$r1
+#define P2H	$r2
+#define P2L	$r3
+#else
+#define P1H	$r1
+#define P1L	$r0
+#define P2H	$r3
+#define P2L	$r2
 #endif
-
-	! ====================================================================
 	.text
 	.align	2
 	.globl	__muldi3
 	.type	__muldi3, @function
 __muldi3:
-	! parameter passing for libgcc functions normally involves 2 doubles
-	!---------------------------------------
 #ifdef __NDS32_ISA_V3M__
 	! There is no mulr64 instruction in Andes ISA V3M.
 	! So we must provide a sequence of calculations to complete the job.
-	smw.adm   $r6, [$sp], $r9, 0x0
-	zeh33	  $r4, P1L
-	srli      $r7, P1L, 16
-	zeh33     $r5, P2L
-	mul       $r6, $r5, $r4
-	mul33     $r5, $r7
-	srli      $r8, P2L, 16
-	mov55     $r9, $r5
-	maddr32   $r9, $r8, $r4
-	srli      $r4, $r6, 16
-	add       $r4, $r9, $r4
-	slt45     $r4, $r5
-	slli      $r5, $r15, 16
-	maddr32   $r5, $r8, $r7
-	mul       P2L, P1H, P2L
-	srli      $r7, $r4, 16
-	maddr32   P2L, P2H, P1L
-	add333    P1H, $r5, $r7
-	slli      $r4, $r4, 16
-	zeh33     $r6, $r6
-	add333    P1L, $r4, $r6
-	add333    P1H, P2L, P1H
-	lmw.bim   $r6, [$sp], $r9, 0x0
+	mul	$r5, P1H, P2L	! (ah=a>>31)*(bl=b&0xffffffff)
+	srli	P1H, P1L, 16	! alh=al>>16
+	maddr32 $r5, P1L, P2H	! ah*bl+(bh=b>>31)*(al=a&0xffffffff)
+	zeh	P1L, P1L	! all=al&0xffff
+	srli	P2H, P2L, 16	! blh=bl>>16
+	zeh	P2L, P2L	! bll=bl&0xffff
+
+	mul     $ta, P1L, P2H	! zA=all*blh
+	mul	$r4, P1L, P2L	! zl=all*bll
+	mul	P2L, P1H, P2L	! zB=alh*bll
+	add     P1L, $ta, P2L	! zA+=zB
+	slt	$ta, P1L, P2L	! zA<zB
+	slli	$ta, $ta, 16	! (zA<zB)<<16
+	slli	P2L, P1L, 16	! zA<<16
+	maddr32 $ta, P1H, P2H	! zh=alh*blh+((zA<zB)<<16)
+	srli	P1H, P1L, 16	! zA>>16
+	add	P1H, P1H, $ta	! zh+=(zA>>16)
+	add     P1L, $r4, P2L	! zl+=(zA<<16)
+	slt	$ta, P1L, $r4	! zl<zA
+	add	P1H, P1H, $ta	! zh+=(zl<zA)
+	add	P1H, P1H, $r5	! zh+=ah*bl+bh*al
 	ret
 #else /* not  __NDS32_ISA_V3M__ */
-	mul	    $ta, P1L, P2H
-	mulr64	$r4, P1L, P2L
-	maddr32	$ta, P1H, P2L
-	move	  P1L, V2L
-	add	    P1H, $ta, V2H
+	mul	P2H, P2H, P1L
+	maddr32	P2H, P1H, P2L
+	mulr64	$r0, P1L, P2L
+	add	P1H, P1H, P2H
 	ret
 #endif /* not __NDS32_ISA_V3M__ */
 	.size	__muldi3, .-__muldi3
@@ -1626,1923 +1809,1626 @@
 
 #ifdef L_addsub_df
 
-#ifndef __big_endian__
-	#define P1L     $r0
-	#define P1H     $r1
-	#define P2L     $r2
-	#define P2H     $r3
-	#define P3L     $r4
-	#define P3H     $r5
-	#define O1L     $r7
-	#define O1H	$r8
+#ifdef __big_endian__
+	#define P1H	$r0
+	#define P1L	$r1
+	#define P2H	$r2
+	#define P2L	$r3
 #else
-	#define P1H     $r0
-	#define P1L     $r1
-	#define P2H     $r2
-	#define P2L     $r3
-	#define P3H     $r4
-	#define P3L     $r5
-	#define O1H     $r7
-	#define O1L	$r8
+	#define P1L	$r0
+	#define P1H	$r1
+	#define P2L	$r2
+	#define P2H	$r3
 #endif
+#define VALAH	$r4
+#define EXPOA	$r7
+#define MANAH	$r9
+#define MANAL	P1L
+#define VALBH	$r6
+#define EXPOB	$r10
+#define MANBH	$r8
+#define MANBL	P2L
+#define SIGN	$lp
+#define W1	$r5
+#define W0	$r4
+#define W2	$r6
+#define AXORB	P2H		// sign of a xor b
+
 	.text
 	.align	2
-	.global  __subdf3
-	.type    __subdf3, @function
+	.global	__subdf3
+	.type	__subdf3, @function
 __subdf3:
-	push    $lp
-	pushm   $r6, $r10
-
-	move    $r4, #0x80000000
-	xor     P2H, P2H, $r4
-
-	j       .Lsdpadd
-
-	.global  __adddf3
-	.type    __adddf3, @function
-__adddf3:
-	push    $lp
-	pushm   $r6, $r10
-.Lsdpadd:
-	slli    $r6, P1H, #1
-	srli    $r6, $r6, #21
-	slli    P3H, P1H, #11
-	srli    $r10, P1L, #21
-	or      P3H, P3H, $r10
-	slli    P3L, P1L, #11
-	move    O1L, #0x80000000
-	or      P3H, P3H, O1L
-	slli    $r9, P2H, #1
-	srli    $r9, $r9, #21
-	slli    O1H, P2H, #11
-	srli    $r10, P2L, #21
-	or      O1H, O1H, $r10
-	or      O1H, O1H, O1L
-	slli    O1L, P2L, #11
-
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LEspecA
-
-.LElab1:
-	addi    $r10, $r9, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LEspecB
-
-.LElab2:
-	#NORMd($r4, P2L, P1L)
-	bnez    P3H, .LL1
-	bnez    P3L, .LL2
-	move    $r6, #0
-	j       .LL3
-.LL2:
-	move    P3H, P3L
-	move    P3L, #0
-	move    P2L, #32
-	sub     $r6, $r6, P2L
-.LL1:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, $r5
+#ifdef __NDS32_EXT_PERF__
+	btgl	P2H, P2H, 31
 #else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, $r5
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
+	move	$r4, #0x80000000
+	xor	P2H, P2H, $r4		! A-B is now A+(-B)
 #endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r3, $r4
-#else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
-#endif
-#endif /* __big_endian__ */
-	beqz    P2L, .LL3
-	sub     $r6, $r6, P2L
-	subri   P1L, P2L, #32
-	srl     P1L, P3L, P1L
-	sll     P3L, P3L, P2L
-	sll     P3H, P3H, P2L
-	or      P3H, P3H, P1L
-.LL3:
-	#NORMd End
 
-	#NORMd($r7, P2L, P1L)
-	bnez    O1H, .LL4
-	bnez    O1L, .LL5
-	move    $r9, #0
-	j       .LL6
-.LL5:
-	move    O1H, O1L
-	move    O1L, #0
-	move    P2L, #32
-	sub     $r9, $r9, P2L
-.LL4:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, O1H
-#else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, O1H
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
-#endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r3, O1H
-#else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	move	$r0, O1H
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
-#endif
-#endif /* __big_endian__ */
-	beqz    P2L, .LL6
-	sub     $r9, $r9, P2L
-	subri   P1L, P2L, #32
-	srl     P1L, O1L, P1L
-	sll     O1L, O1L, P2L
-	sll     O1H, O1H, P2L
-	or      O1H, O1H, P1L
-.LL6:
-	#NORMd End
-
-	move    $r10, #0x80000000
-	and     P1H, P1H, $r10
-
-	beq     $r6, $r9, .LEadd3
-	slts    $r15, $r9, $r6
-	beqzs8  .Li1
-	sub     $r9, $r6, $r9
-	move    P2L, #0
-.LL7:
-	move    $r10, #0x20
-	slt     $r15, $r9, $r10
-	bnezs8  .LL8
-	or      P2L, P2L, O1L
-	move    O1L, O1H
-	move    O1H, #0
-	addi    $r9, $r9, #0xffffffe0
-	bnez    O1L, .LL7
-.LL8:
-	beqz    $r9, .LEadd3
-	move    P1L, O1H
-	move    $r10, O1L
-	srl     O1L, O1L, $r9
-	srl     O1H, O1H, $r9
-	subri   $r9, $r9, #0x20
-	sll     P1L, P1L, $r9
-	or      O1L, O1L, P1L
-	sll     $r10, $r10, $r9
-	or      P2L, P2L, $r10
-	beqz    P2L, .LEadd3
-	ori     O1L, O1L, #1
-	j       .LEadd3
-.Li1:
-	move    $r15, $r6
-	move    $r6, $r9
-	sub     $r9, $r9, $r15
-	move    P2L, #0
-.LL10:
-	move    $r10, #0x20
-	slt     $r15, $r9, $r10
-	bnezs8  .LL11
-	or      P2L, P2L, P3L
-	move    P3L, P3H
-	move    P3H, #0
-	addi    $r9, $r9, #0xffffffe0
-	bnez    P3L, .LL10
-.LL11:
-	beqz    $r9, .LEadd3
-	move    P1L, P3H
-	move    $r10, P3L
-	srl     P3L, P3L, $r9
-	srl     P3H, P3H, $r9
-	subri   $r9, $r9, #0x20
-	sll     P1L, P1L, $r9
-	or      P3L, P3L, P1L
-	sll     $r10, $r10, $r9
-	or      P2L, P2L, $r10
-	beqz    P2L, .LEadd3
-	ori     P3L, P3L, #1
-
-.LEadd3:
-	xor     $r10, P1H, P2H
-	sltsi   $r15, $r10, #0
-	bnezs8  .LEsub1
-
-	#ADD(P3L, O1L)
-	add     P3L, P3L, O1L
-	slt     $r15, P3L, O1L
+	.global	__adddf3
+	.type	__adddf3, @function
+__adddf3:
+	slli	VALAH, P1H, 1		! hi-part(A)<<1
+	smw.adm	$r6, [$sp], $r10, 2
 
-	#ADDCC(P3H, O1H)
-	beqzs8  .LL13
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .LL14
-	addi    P3H, P3H, #0x1
-	j       .LL15
-.LL14:
-	move    $r15, #1
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-	j       .LL15
-.LL13:
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-.LL15:
+	slli	VALBH, P2H, 1		! hi-part(B)<<1
+	move	SIGN, #0x80000000
+	slt	$r15, VALAH, VALBH
+	bnez	$r15, .LEswap
+	bne	VALAH, VALBH, .LEmain
+	slt	$r15, P1L, P2L
+	beqz	$r15, .LEmain
 
-	beqzs8  .LEres
-	andi    $r10, P3L, #1
-	beqz    $r10, .Li3
-	ori     P3L, P3L, #2
-.Li3:
-	srli    P3L, P3L, #1
-	slli    $r10, P3H, #31
-	or      P3L, P3L, $r10
-	srli    P3H, P3H, #1
-	move    $r10, #0x80000000
-	or      P3H, P3H, $r10
-	addi    $r6, $r6, #1
-	subri   $r15, $r6, #0x7ff
-	bnezs8  .LEres
-	move    $r10, #0x7ff00000
-	or      P1H, P1H, $r10
-	move    P1L, #0
-	j       .LEretA
+	! |A|<|B|, do swap
+.LEswap:
+	movd44	$r8, $r0
+	movd44	$r0, $r2
+	movd44	$r2, $r8
+	slli	VALAH, P1H, 1		! hi-part(A)<<1
+	slli	VALBH, P2H, 1		! hi-part(B)<<1
+
+	! ---------------------------------------------------------------------
+	! |A|>=|B|
+	! ---------------------------------------------------------------------
+.LEmain:
+	xor	P2H, P2H, P1H
+	and	AXORB, P2H, SIGN	! sign of (A xor B)
+	srli	EXPOA, VALAH, #21	! exponent(A)
+	srli	EXPOB, VALBH, #21	! exponent(B)
+	slli	MANAH, VALAH, #10	! (dirty) hi-part of mantissa(A)
+	slli	MANBH, VALBH, #10	! (dirty) hi-part of mantissa(B)
+	move	W1, #0x7ff
+	beq	W1, EXPOA, .LEinfnan	! if A is NaN or inf, goto .LEinfnan
+	! A is finite, thus B must be finite
+	or	$r15, VALAH, P1L
+	beqz	$r15, .LEzeroP		! if A is zero, return zero
+	or	$r15, VALBH, P2L
+	beqz	$r15, .LEretA		! if B is zero, return A
+	sub	W2, EXPOA, EXPOB	! exponent(A)-exponent(B)
+	slti	$r15, W2, #0x40
+	beqz	$r15, .LEretA		! B is insignificant, return A
+	srli	W1, P1L, #21		! (dirty) mantissa(A)<<11
+	or	MANAH, MANAH, W1
+	slli	MANAL, P1L, #11
+	srli	W1, P2L, #21		! (dirty) mantissa(B)<<11
+	or	MANBH, MANBH, W1
+	slli	MANBL, P2L, #11
+	slti	$r15, EXPOA, #0x2
+	bnez	$r15, .LEmain4		! if exponent(A) is 0 or 1, got .LEmain4
+	or	MANAH, MANAH, SIGN	! mantissa(A)<<11
+!	or	W1, MANBH, SIGN
+!	cmovn	MANBH, W1, EXPOB
+	beqz	EXPOB, .LEmain1
+	or	MANBH, MANBH, SIGN	! mantissa(A)<<11
+
+.LEmain1:
+	addi	W1, W2, #-1		! adjusted shift amount
+	cmovz	W2, W1, EXPOB
+	beqz	W2, .LEmain4		! shift amount is zero, simply skip
+	! mantissa(b)>>shift amount
+	subri	W1, W2, #0x20		! 32-exponent(sum)
+	blez	W1, .LEmain2		! if exponent(sum)>=32, goto .LEmain2
+
+	! exponent(sum)<32
+	sll	W0, MANBL, W1		! shift-out portion
+	sll	W1, MANBH, W1
+	srl	MANBH, MANBH, W2
+	srl	MANBL, MANBL, W2
+	or	MANBL, MANBL, W1
+	b	.LEmain3
+
+.LEmain2:
+	! exponent(sum)>=32
+	subri	W2, W1, #0
+	addi	W1, W1, #0x20
+	sll	W0, MANBH, W1
+	or	W0, W0, MANBL		! shift-out portion
+	cmovz	W0, W2, W2
+	srl	MANBL, MANBH, W2
+	move	MANBH, #0
+
+.LEmain3:
+!	ori	$r15, MANBL, #2
+!	cmovn	MANBL, W0, $r15
+	beqz	W0, .LEmain4
+	ori	MANBL, MANBL, #2	! B is quite small compare to A
+
+.LEmain4:
+	beqz	AXORB, .LEadd		! same sign, do addition
+
+	! ---------------------------------------------------------------------
+	! differnet sign, do subtraction
+	! ---------------------------------------------------------------------
+	bne	EXPOA, EXPOB, .LEsub1
+	bne	MANAH, MANBH, .LEsub1
+	beq	MANAL, MANBL, .LEzero	! |A|==|B|, return zero
 
+	! |A|>|B|
 .LEsub1:
-	#SUB(P3L, O1L)
-	move    $r15, P3L
-	sub     P3L, P3L, O1L
-	slt     $r15, $r15, P3L
-
-	#SUBCC(P3H, O1H)
-	beqzs8  .LL16
-	move    $r15, P3H
-	sub     P3H, P3H, O1H
-	slt     $r15, $r15, P3H
-	beqzs8  .LL17
-	subi333 P3H, P3H, #1
-	j       .LL18
-.LL17:
-	move    $r15, P3H
-	subi333 P3H, P3H, #1
-	slt     $r15, $r15, P3H
-	j       .LL18
-.LL16:
-	move    $r15, P3H
-	sub     P3H, P3H, O1H
-	slt     $r15, $r15, P3H
-.LL18:
-
-	beqzs8  .Li5
-	move    $r10, #0x80000000
-	xor     P1H, P1H, $r10
-
-	subri   P3H, P3H, #0
-	beqz    P3L, .LL19
-	subri   P3L, P3L, #0
-	subi45  P3H, #1
-.LL19:
-
-.Li5:
-	#NORMd($r4, $r9, P1L)
-	bnez    P3H, .LL20
-	bnez    P3L, .LL21
-	move    $r6, #0
-	j       .LL22
-.LL21:
-	move    P3H, P3L
-	move    P3L, #0
-	move    $r9, #32
-	sub     $r6, $r6, $r9
-.LL20:
-#ifdef __NDS32_PERF_EXT__
-	clz	$r9, P3H
-#else
-	pushm	$r0, $r5
-	move	$r0, P3H
-	bal	__clzsi2
-	move	$r9, $r0
-	popm	$r0, $r5
-#endif
-	beqz    $r9, .LL22
-	sub     $r6, $r6, $r9
-	subri   P1L, $r9, #32
-	srl     P1L, P3L, P1L
-	sll     P3L, P3L, $r9
-	sll     P3H, P3H, $r9
-	or      P3H, P3H, P1L
-.LL22:
-	#NORMd End
-
-	or      $r10, P3H, P3L
-	bnez    $r10, .LEres
-	move    P1H, #0
-
-.LEres:
-	blez    $r6, .LEund
-
-.LElab8:
-	#ADD(P3L, $0x400)
-	move    $r15, #0x400
-	add     P3L, P3L, $r15
-	slt     $r15, P3L, $r15
-
-	#ADDCC(P3H, $0x0)
-	beqzs8  .LL25
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-.LL25:
-
-	#ADDC($r6, $0x0)
-	add     $r6, $r6, $r15
-	srli    $r10, P3L, #11
-	andi    $r10, $r10, #1
-	sub     P3L, P3L, $r10
-	srli    P1L, P3L, #11
-	slli    $r10, P3H, #21
-	or      P1L, P1L, $r10
-	slli    $r10, P3H, #1
-	srli    $r10, $r10, #12
-	or      P1H, P1H, $r10
-	slli    $r10, $r6, #20
-	or      P1H, P1H, $r10
+	! is mantissa(|A|)<mantissa(|B|)?
+	slt	$r15, MANAH, MANBH
+	bnez	$r15, .LEsub2		! mantissa(|A|)<mantissa(|B|), continue
+	bne	MANAH, MANBH, .LEsub3	! mantissa(|A|)!=mantissa(|B|), skip
+	slt	$r15, MANAL, MANBL
+	beqz	$r15, .LEsub3		! mantissa(|A|)>=mantissa(|B|), skip
+
+.LEsub2:
+	! mantissa(|A|)<mantissa(|B|), adjust
+	addi	EXPOA, EXPOA, #-1
+	! mantissa(|B|)>>1
+	slli	W0, MANBH, #31
+	srli	MANBH, MANBH, #1
+	srli	MANBL, MANBL, #1
+	or	MANBL, MANBL, W0
+
+.LEsub3:
+	! calculate mantissa(|A|)-mantissa(|B|)
+	move	W0, MANAL
+	sub	MANAL, MANAL, MANBL
+	slt	$r15, W0, MANAL
+	sub	MANAH, MANAH, $r15	! no undeflow issue
+	sub	MANAH, MANAH, MANBH
+	slti	$r15, EXPOA, #2
+	bnez	$r15, .LEdenorm		! when exponent(A,B) is (0,0) or (1,0/1)
+	! count leading zero of mantissa(|A|)
+	bnez	MANAH, .LEsub4
+#ifdef __NDS32_EXT_PERF__
+	move	W0, #0x20
+	slt	W0, EXPOA, W0
+	move	W1, EXPOA
+	bnez	W0, .LEsub5
+	move	MANAH, MANAL
+	move	MANAL, #0
+	addi	EXPOA, EXPOA, #-32
+
+.LEsub4:
+	clz	W1, MANAH		! leading zero count
+	slt	$r15, W1, EXPOA		! leading zero count>=exponent(A)?
+	subri	W0, $r15, #1
+	min	W1, W1, EXPOA		! calculated shift amount
+	beqz	W1, .LEround		! shift amount is 0, skip
+.LEsub5:
+	sub	EXPOA, EXPOA, W1
+	! mantissa(diff)<<adjusted shift amount
+	sub	W1, W1, W0		! adjusted shift amount
+	subri	W2, W1, #0x20
+	srl	W0, MANAL, W2
+	sll	MANAH, MANAH, W1
+	sll	MANAL, MANAL, W1
+	or	MANAH, MANAH, W0
+#else
+	slti	$r15, EXPOA, #0x20
+	bnez	$r15, .LEsub4
+	move	MANAH, MANAL
+	move	MANAL, #0
+	addi	EXPOA, EXPOA, #-32
+	bnez	EXPOA, .LEsub4
+	b	.LEround
+
+.LEloop:
+	addi	EXPOA, EXPOA, #-1
+	beqz	EXPOA, .LEround
+	srli	W0, MANAL, #31
+	slli	MANAH, MANAH, #1
+	slli	MANAL, MANAL, #1
+	or	MANAH, MANAH, W0
+
+.LEsub4:
+	slt	$r15, MANAH, SIGN
+	bnez	$r15, .LEloop
+#endif
+
+	! ---------------------------------------------------------------------
+	! do rounding
+	! ---------------------------------------------------------------------
+.LEround:
+	addi	MANAL, MANAL, #0x400
+	slti	$r15, MANAL, #0x400
+	add	MANAH, MANAH, $r15
+	slt	$r15, MANAH, $r15
+	add	EXPOA, EXPOA, $r15
+	srli	W1, MANAL, #11
+	andi	W1, W1, #1
+	move	W2, MANAL
+	sub	MANAL, MANAL, W1
+	slt	$r15, W2, MANAL
+	sub	MANAH, MANAH, $r15
+
+	! ---------------------------------------------------------------------
+	! pack result
+	! ---------------------------------------------------------------------
+	slli	MANAH, MANAH, #1	! shift our implied 1
+	slli	W1, MANAH, #20
+	srli	MANAH, MANAH, #12
+	srli	MANAL, MANAL, #11
+	or	MANAL, MANAL, W1
+	slli	W1, EXPOA, #20
+	or	MANAH, MANAH, W1
+
+.LEpack:
+	and	P1H, P1H, SIGN
+	or	P1H, P1H, MANAH
 
 .LEretA:
-.LE999:
-	popm    $r6, $r10
-	pop     $lp
-	ret5    $lp
+.LEret:
+	lmw.bim	$r6, [$sp], $r10, 2
+	ret5	$lp
 
-.LEspecA:
-	#ADD(P3L, P3L)
-	move    $r15, P3L
-	add     P3L, P3L, P3L
-	slt     $r15, P3L, $r15
-
-	#ADDC(P3H, P3H)
-	add     P3H, P3H, P3H
-	add     P3H, P3H, $r15
-	bnez    $r6, .Li7
-	or      $r10, P3H, P3L
-	beqz    $r10, .Li8
-	j       .LElab1
-.Li8:
-	subri   $r15, $r9, #0x7ff
-	beqzs8  .LEspecB
-	add     P3L, P2H, P2H
-	or      $r10, P3L, P2L
-	bnez    $r10, .LEretB
-	sltsi   $r15, P2H, #0
-	bnezs8  .LEretA
-
-.LEretB:
-	move    P1L, P2L
-	move    P1H, P2H
-	j       .LE999
-.Li7:
-	or      $r10, P3H, P3L
-	bnez    $r10, .LEnan
-	subri   $r15, $r9, #0x7ff
-	bnezs8  .LEretA
-	xor     $r10, P1H, P2H
-	sltsi   $r15, $r10, #0
-	bnezs8  .LEnan
-	j       .LEretB
-
-.LEspecB:
-	#ADD(O1L, O1L)
-	move    $r15, O1L
-	add     O1L, O1L, O1L
-	slt     $r15, O1L, $r15
-
-	#ADDC(O1H, O1H)
-	add     O1H, O1H, O1H
-	add     O1H, O1H, $r15
-	bnez    $r9, .Li11
-	or      $r10, O1H, O1L
-	beqz    $r10, .LEretA
-	j       .LElab2
-.Li11:
-	or      $r10, O1H, O1L
-	beqz    $r10, .LEretB
+.LEadd:
+	! ---------------------------------------------------------------------
+	! same sign, do addition
+	! ---------------------------------------------------------------------
+	add	MANAL, MANAL, MANBL
+	slt	$r15, MANAL, MANBL
+	add	MANAH, MANAH, $r15
+	slt	r15, MANAH, $r15
+	add	MANAH, MANAH, MANBH
+	bnez	$r15, .LEaddover	! overflow, goto .LEaddover
+	slt	$r15, MANAH, MANBH
+	bnez	$r15, .LEaddover	! overflow, goto .LEaddover
+	! all works fine without overflow
+	bnez	EXPOA, .LEround
+
+.LEdenorm:
+	! mantissa(sum)>>11
+	srli	MANAL, MANAL, #11
+	slli	W0, MANAH, #21
+	srli	MANAH, MANAH, #11
+	or	MANAL, MANAL, W0
+	b	.LEpack
+
+	! handle overflow
+.LEaddover:
+	subri	W1, EXPOA, #0x7fe
+	beqz	W1, .LEinf
+	andi	$r15, MANAL, #1
+	ori	W1, MANAL, #2
+	cmovn	MANAL, W1, $r15
+	! mantissa(sum)>>1
+	slli	W0, MANAH, #31
+	srli	MANAH, MANAH, #1
+	srli	MANAL, MANAL, #1
+	or	MANAL, MANAL, W0
+	addi	EXPOA, EXPOA, #1
+	b	.LEround
+
+.LEinf:
+	move	P1L, #0
+	move	MANAH, 0x7ff00000	! return inf
+	b	.LEpack
+
+	! handle 0.0f or -0.0f
+.LEzeroP:
+	beqz	AXORB, .LEretA		! A and B same sign, return A
+
+.LEzero:
+	move	P1L, #0
+	move	P1H, #0			! otherwise, return 0.0f
+	b	.LEret
+
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x7ff
+	! ---------------------------------------------------------------------
+.LEinfnan:
+	or	MANAH, MANAH, MANAL
+	bne	MANAH, SIGN, .LEnan	! if A is NaN, goto .LEnan
+	! A is inf
+	bne	W1, EXPOB, .LEretA	! B is finite, return A
+	! B is also inf
+	beqz	AXORB, .LEretA		! same sign, return A
 
 .LEnan:
-	move    P1H, #0xfff80000
-	move    P1L, #0
-	j       .LEretA
-
-.LEund:
-	subri   $r9, $r6, #1
-	move    P2L, #0
-.LL26:
-	move    $r10, #0x20
-	slt     $r15, $r9, $r10
-	bnezs8  .LL27
-	or      P2L, P2L, P3L
-	move    P3L, P3H
-	move    P3H, #0
-	addi    $r9, $r9, #0xffffffe0
-	bnez    P3L, .LL26
-.LL27:
-	beqz    $r9, .LL28
-	move    P1L, P3H
-	move    $r10, P3L
-	srl     P3L, P3L, $r9
-	srl     P3H, P3H, $r9
-	subri   $r9, $r9, #0x20
-	sll     P1L, P1L, $r9
-	or      P3L, P3L, P1L
-	sll     $r10, $r10, $r9
-	or      P2L, P2L, $r10
-	beqz    P2L, .LL28
-	ori     P3L, P3L, #1
-.LL28:
-	move    $r6, #0
-	j       .LElab8
+	move	P1L, #0
+	move	P1H, #0xfff80000	! return NaN
+	b	.LEret
 	.size   __subdf3, .-__subdf3
 	.size   __adddf3, .-__adddf3
 #endif /* L_addsub_df */
 
 
+#ifdef L_mul_df
 
-#ifdef L_mul_sf
-
-#if !defined (__big_endian__)
-	#define P1L     $r0
-	#define P1H     $r1
-	#define P2L     $r2
-	#define P2H     $r3
+#ifdef __big_endian__
+	#define P1H	$r0
+	#define P1L	$r1
+	#define P2H	$r2
+	#define P2L	$r3
+	#define MANAH	$r6
+	#define MANAL	$r7
+	#define MANBH	$r8
+	#define MANBL	$r9
 #else
-	#define P1H     $r0
-	#define P1L     $r1
-	#define P2H     $r2
-	#define P2L     $r3
-#endif
+	#define P1L	$r0
+	#define P1H	$r1
+	#define P2L	$r2
+	#define P2H	$r3
+	#define MANAL	$r6
+	#define MANAH	$r7
+	#define MANBL	$r8
+	#define MANBH	$r9
+#endif
+#define EXPOA	$r4
+#define EXPOB	$r5
+#define SIGN	$fp
+#define W1	$r5
+#define W2	P2L
+#define W3	P2H
+#define W4	P1L
+#define W5	P1H
+#define W6	$r10
+#define AXORB	$r10
+
 	.text
 	.align	2
-	.global	__mulsf3
-	.type	__mulsf3, @function
-__mulsf3:
-	push    $lp
-	pushm   $r6, $r10
-
-	srli    $r3, $r0, #23
-	andi    $r3, $r3, #0xff
-	srli    $r5, $r1, #23
-	andi    $r5, $r5, #0xff
-	move    $r6, #0x80000000
-	slli    $r2, $r0, #8
-	or      $r2, $r2, $r6
-	slli    $r4, $r1, #8
-	or      $r4, $r4, $r6
-	xor     $r8, $r0, $r1
-	and     $r6, $r6, $r8
-
-	addi    $r8, $r3, #-1
-	slti    $r15, $r8, #0xfe
-	beqzs8  .LFspecA
-
-.LFlab1:
-	addi    $r8, $r5, #-1
-	slti    $r15, $r8, #0xfe
-	beqzs8  .LFspecB
+	.global	__muldf3
+	.type	__muldf3, @function
+__muldf3:
+	push25	$r10, #16
 
-.LFlab2:
-	move    $r10, $r3
-/* This is a 64-bit multiple. ($r2, $r7) is (high, low). */
+	slli	EXPOA, P1H, #1
+	srli	EXPOA, EXPOA, #21	! exponent(a)
+	slli	MANAH, P1H, #11		! (dirty) mantissa(a)
+	srli	MANAL, P1L, #21
+	or	MANAH, MANAH, MANAL
+	slli	MANAL, P1L, #11
+	move	SIGN, #0x80000000
+	slli	EXPOB, P2H, #1
+	srli	EXPOB, EXPOB, #21	! exponent(b)
+	slli	MANBH, P2H, #11		! (dirty) mantissa(b)
+	srli	MANBL, P2L, #21
+	or	MANBH, MANBH, MANBL
+	slli	MANBL, P2L, #11
+	xor	W3, P2H, P1H
+	and	AXORB, W3, SIGN		! sign of (A xor B)
+
+	move	W3, 0x7ff
+	beqz	EXPOA, .LFAexpzero	! exponent(A) is 0x000
+	beq	W3, EXPOA, .LFAinfnan	! exponent(A) is 0x7ff
+	or	MANAH, MANAH, SIGN
+
+.LFmain1:
+	beqz	EXPOB, .LFBexpzero	! exponent(B) is 0x000
+	beq	W3, EXPOB, .LFBinfnan	! exponent(B) is 0x7ff
+	or	MANBH, MANBH, SIGN
+
+	! ---------------------------------------------------------------------
+	! multiply two 64-bit unsigned integers for 128-bit product.
+	! ---------------------------------------------------------------------
+.LFmain2:
+	! exponent(product) = exponent(A) + exponent(B) - 0x3fe
+	swi	AXORB, [$sp+(12)]
+	addi	W4, EXPOB, #0xfffffc02
+	add	EXPOA, EXPOA, W4
+	! PHH: hi-part of mantissa(A) * hi-part of mantissa(B)
+	! This is a 64-bit multiplication: (P2H, P2L) is (high, low).
 #ifndef __NDS32_ISA_V3M__
-	mulr64	$r2, $r2, $r4
+	mulr64	$r2, MANAH, MANBH
 #else
-	pushm	$r0, $r1
-	pushm	$r4, $r5
-	move	P1L, $r2
-	movi	P1H, #0
-	move	P2L, $r4
-	movi	P2H, #0
-	bal	__muldi3
+	swi	EXPOA, [$sp+(8)]
+	move	$r0, MANAH
+	move	$r1, MANBH
+	bal	umul_ppmm
 	movd44	$r2, $r0
-	popm	$r4, $r5
-	popm	$r0, $r1
 #endif
-#ifndef __big_endian__
-	move    $r7, $r2
-	move    $r2, $r3
+	! PLH: lo-part of mantissa(A) * hi-part of mantissa(B)
+	! This is a 64-bit multiplication: (P1H, P1L) is (high, low).
+#ifndef __NDS32_ISA_V3M__
+	mulr64	$r0, MANAL, MANBH
 #else
-	move	$r7, $r3
-#endif
-	move    $r3, $r10
-
-	beqz    $r7, .Li17
-	ori     $r2, $r2, #1
-
-.Li17:
-	sltsi   $r15, $r2, #0
-	bnezs8  .Li18
-	slli    $r2, $r2, #1
-	addi    $r3, $r3, #-1
-.Li18:
-	addi    $r8, $r5, #0xffffff82
-	add     $r3, $r3, $r8
-	addi    $r8, $r3, #-1
-	slti    $r15, $r8, #0xfe
-	beqzs8  .LFoveund
-
-.LFlab8:
-	#ADD($r2, $0x80)
-	move    $r15, #0x80
-	add     $r2, $r2, $r15
-	slt     $r15, $r2, $r15
-
-	#ADDC($r3, $0x0)
-	add     $r3, $r3, $r15
-	srli    $r8, $r2, #8
-	andi    $r8, $r8, #1
-	sub     $r2, $r2, $r8
-	slli    $r2, $r2, #1
-	srli    $r2, $r2, #9
-	slli    $r8, $r3, #23
-	or      $r2, $r2, $r8
-	or      $r0, $r2, $r6
-
-.LF999:
-	popm    $r6, $r10
-	pop     $lp
-	ret5    $lp
-
-.LFspecA:
-	bnez    $r3, .Li19
-	add     $r2, $r2, $r2
-	beqz    $r2, .Li20
-#ifdef __NDS32_PERF_EXT__
-	clz	$r7, $r2
+	smw.bi	$r2, [$sp], $r3, #0
+	move	$r0, MANAL
+	move	$r1, MANBH
+	bal	umul_ppmm
+	lmw.bi	$r2, [$sp], $r3, #0
+#endif
+	! add PLH and PHH for 96-bit result in (W6, MANBH, P1L).
+	add	MANBH, P2L, P1H
+	slt	$r15, MANBH, P1H
+	add	W6, P2H, $r15
+	! PHL: hi-part of mantissa(A) * lo-part of mantissa(B)
+	! This is a 64-bit multiplication: (P2H, P2L) is (high, low).
+#ifndef __NDS32_ISA_V3M__
+	mulr64	$r2, MANAH, MANBL
 #else
-	pushm	$r0, $r5
-	move	$r0, $r2
-	bal	__clzsi2
-	move	$r7, $r0
-	popm	$r0, $r5
+	swi	P1L, [$sp+(4)]
+	move	$r0, MANAH
+	move	$r1, MANBL
+	bal	umul_ppmm
+	movd44	$r2, $r0
+	lwi	P1L, [$sp+(4)]
 #endif
-	sub     $r3, $r3, $r7
-	sll     $r2, $r2, $r7
-	j       .LFlab1
-.Li20:
-	subri   $r15, $r5, #0xff
-	beqzs8  .LFnan
-	j       .LFzer
-.Li19:
-	add     $r8, $r2, $r2
-	bnez    $r8, .LFnan
-	bnez    $r5, .Li21
-	add     $r8, $r4, $r4
-	beqz    $r8, .LFnan
-.Li21:
-	subri   $r15, $r5, #0xff
-	bnezs8  .LFinf
-
-.LFspecB:
-	bnez    $r5, .Li22
-	add     $r4, $r4, $r4
-	beqz    $r4, .LFzer
-#ifdef __NDS32_PERF_EXT__
-	clz	$r7, $r4
+	! add PHL, PLH, and PHH for 96-bit result in (W6, MANBH, MANAH).
+	add	MANAH, P2L, P1L
+	slt	$r15, MANAH, P1L
+	add	MANBH, MANBH, $r15
+	slt	$r15, MANBH, $r15
+	add	W6, W6, $r15
+	add	MANBH, MANBH, P2H
+	slt	$r15, MANBH, P2H
+	add	W6, W6, $r15
+	! PLL: lo-part of mantissa(A) * lo-part of mantissa(B)
+	! This is a 64-bit multiplication: (P1H, P1L) is (high, low).
+#ifndef __NDS32_ISA_V3M__
+	mulr64	$r0, MANAL, MANBL
 #else
-	pushm	$r0, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r7, $r0
-	popm	$r0, $r5
-#endif
-	sub     $r5, $r5, $r7
-	sll     $r4, $r4, $r7
-	j       .LFlab2
-
-.LFzer:
-	move    $r0, $r6
-	j       .LF999
-.Li22:
-	add     $r8, $r4, $r4
-	bnez    $r8, .LFnan
+	move	$r0, MANAL
+	move	$r1, MANBL
+	bal	umul_ppmm
+	lwi	EXPOA, [$sp+(8)]
+#endif
+	! add PLL, PHL, PLH, and PHH for 128-bit result in (P1H, MANBH, MANAH, P1L).
+	add	MANAH, MANAH, P1H
+	slt	$r15, MANAH, P1H
+	add	MANBH, MANBH, $r15
+	slt	$r15, MANBH, $r15
+	add	P1H, W6, $r15
+
+	! take high 64-bit part of the product into (P1H, P1L).
+	or	MANAH, MANAH, P1L
+	ori	P1L, MANBH, #1		! adjust if low 64-bit is non-zero.
+	cmovz	P1L, MANBH, MANAH
+	sltsi	$r15, P1H, #0
+	bnez	$r15, .LFmain3
+	! MSB is zero, adjust
+	move	$r15, P1L
+	add	P1L, P1L, P1L
+	slt	$r15, P1L, $r15
+	add	P1H, P1H, P1H
+	add	P1H, P1H, $r15
+	addi	EXPOA, EXPOA, #-1
+
+.LFmain3:
+	lwi	AXORB, [$sp+(12)]
+	blez	EXPOA, .LFunderflow	! exponent(product) is too small
+	subri	W1, EXPOA, #0x7ff
+	blez	W1, .LFinf		! exponent(product) is too big, return inf
+	addi	P1L, P1L, #0x400
+	slti	$r15, P1L, #0x400
+	beqz	$r15, .LFround
+	add	P1H, P1H, $r15
+	slt	$r15, P1H, $r15
+	add	EXPOA, EXPOA, $r15
+
+	! do rounding
+.LFround:
+	srli	W2, P1L, #11
+	andi	W2, W2, #1
+	sub	P1L, P1L, W2
+	srli	P1L, P1L, #11
+	slli	W2, P1H, #21
+	or	P1L, P1L, W2
+
+	! do packing
+	slli	P1H, P1H, #1
+	srli	P1H, P1H, #12
+	slli	W1, EXPOA, #20
+	or	P1H, P1H, W1
 
-.LFinf:
-	move    $r8, #0x7f800000
-	or      $r0, $r6, $r8
-	j       .LF999
+.LFret:
+	or	P1H, P1H, AXORB
+	pop25	$r10, #16
 
-.LFnan:
-	move    $r0, #0xffc00000
-	j       .LF999
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x000
+	! ---------------------------------------------------------------------
+.LFAexpzero:
+	or	$r15, MANAH, MANAL
+	beqz	$r15, .LFAzero		! A is zero
+	! A is subnormal
+	srli	$r15, MANAL, #31
+	add	MANAH, MANAH, MANAH
+	add	MANAH, MANAH, $r15
+	add	MANAL, MANAL, MANAL
+	! count leading zeros of A
+	bnez	MANAH, .LFAcont
+	move	MANAH, MANAL
+	move	MANAL, #0
+	addi	EXPOA, EXPOA, #-32
+
+	! MANAH is non-zero
+.LFAcont:
+#ifdef __NDS32_EXT_PERF__
+	clz	W4, MANAH
+#else
+	move	W4, #0
+	move	W5, MANAH
+	b	.LFAloop2
+
+.LFAloop:
+	add	W5, W5, W5
+	addi	W4, W4, #1
+
+.LFAloop2:
+	slt	$r15, W5, SIGN
+	bnez	$r15, .LFAloop
+#endif
+	beqz	W4, .LFmain1
+	sub	EXPOA, EXPOA, W4
+	subri	W2, W4, #32
+	srl	W2, MANAL, W2
+	sll	MANAL, MANAL, W4
+	sll	MANAH, MANAH, W4
+	or	MANAH, MANAH, W2
+	b	.LFmain1
+
+.LFAzero:
+	beq	W3, EXPOB, .LFnan	! B is NaN or inf, return NaN
+
+.LFsetsign:
+	move	P1H, AXORB
+	pop25	$r10, #16
+
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x7ff
+	! ---------------------------------------------------------------------
+.LFAinfnan:
+	or	MANAH, MANAH, MANAL
+	bne	MANAH, SIGN, .LFnan	! A is NaN, return NaN
+	! A is inf: check whether B is zero.
+	bnez	EXPOB, .LFAcont2
+	slli	W2, MANBH, #1
+	or	W2, W2, MANBL
+	beqz	W2, .LFnan		! inf*zero is NaN
+
+.LFAcont2:
+	bne	W3, EXPOB, .LFinf	! B is finite, return inf
+
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x7ff
+	! ---------------------------------------------------------------------
+.LFBinfnan:
+	or	MANBH, MANBH, MANBL
+	bne	MANBH, SIGN, .LFnan	! B is NaN, return NaN
+	! B is inf
 
-.LFoveund:
-	bgtz    $r3, .LFinf
-	subri   $r7, $r3, #1
-	slti    $r15, $r7, #0x20
-	beqzs8  .LFzer
-	subri   $r8, $r7, #0x20
-	sll     $r3, $r2, $r8
-	srl     $r2, $r2, $r7
-	beqz    $r3, .Li25
-	ori     $r2, $r2, #2
-.Li25:
-	move    $r3, #0
-	addi    $r8, $r2, #0x80
-	sltsi   $r15, $r8, #0
-	beqzs8  .LFlab8
-	move    $r3, #1
-	j       .LFlab8
-	.size	__mulsf3, .-__mulsf3
-#endif /* L_mul_sf */
+.LFinf:
+	move	P1L, #0
+	move	P1H, #0x7ff00000
+	b	.LFret
 
+.LFnan:
+	move	P1L, #0
+	move	P1H, #0xfff80000
+	pop25	$r10, #16
+
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x000
+	! ---------------------------------------------------------------------
+.LFBexpzero:
+	or	P1L, MANBH, MANBL
+	beqz	P1L, .LFsetsign		! B is zero, return zero
+	! B is subnormal
+	srli	$r15, MANBL, #31
+	add	MANBH, MANBH, MANBH
+	add	MANBH, MANBH, $r15
+	add	MANBL, MANBL, MANBL
+	! count leading zeros of B
+	bnez	MANBH, .LFBcont
+	move	MANBH, MANBL
+	move	MANBL, #0
+	addi	EXPOB, EXPOB, #-32
+
+	! MANBH is non-zero
+.LFBcont:
+#ifdef __NDS32_EXT_PERF__
+	clz	W4, MANBH
+#else
+	move	W4, #0
+	move	W5, MANBH
+	b	.LFBloop2
+
+.LFBloop:
+	add	W5, W5, W5
+	addi	W4, W4, #1
+
+.LFBloop2:
+	slt	$r15, W5, SIGN
+	bnez	$r15, .LFBloop
+#endif
+	beqz	W4, .LFmain2
+	sub	EXPOB, EXPOB, W4
+	subri	W2, W4, #32
+	srl	W2, MANBL, W2
+	sll	MANBL, MANBL, W4
+	sll	MANBH, MANBH, W4
+	or	MANBH, MANBH, W2
+	b	.LFmain2
+
+	! ---------------------------------------------------------------------
+	! handle underflow
+	! ---------------------------------------------------------------------
+.LFunderflow:
+	move	MANAL, #0
+	subri	W3, EXPOA, #1
+	slti	$r15, W3, #0x20
+	bnez	$r15, .LFunderflow2
+	move	MANAL, P1L
+	move	P1L, P1H
+	move	P1H, #0
+	addi	W3, W3, #0xffffffe0
+	beqz	P1L, .LFunderflow2
+	slti	$r15, W3, #0x20
+	beqz	$r15, .LFignore		! result too small, return zero
+
+	! 1-exponent(A), in W3, is 0-31
+.LFunderflow2:
+	beqz	W3, .LFunderflow3	! it is zero, skip
+	subri	W2, W3, #0x20
+	sll	MANAH, P1H, W2
+	sll	W1, P1L, W2
+	srl	P1L, P1L, W3
+	srl	P1H, P1H, W3
+	or	P1L, P1L, MANAH
+	or	MANAL, MANAL, W1
+!	ori	W3, P1L, #1
+!	cmovn	P1L, W3, MANAL
+	beqz	MANAL, .LFunderflow3
+	ori	P1L, P1L, #1
+
+.LFunderflow3:
+	addi	P1L, P1L, #0x400
+	slti	$r15, P1L, #0x400
+	add	P1H, P1H, $r15
+	srli	EXPOA, P1H, #31
+	b	.LFround
+
+.LFignore:
+	move	P1L, #0
+	b	.LFsetsign
+	.size __muldf3, .-__muldf3
+#endif /* L_mul_df */
 
 
-#ifdef L_mul_df
+#ifdef L_div_df
 
-#ifndef __big_endian__
-	#define P1L     $r0
-	#define P1H     $r1
-	#define P2L     $r2
-	#define P2H     $r3
-	#define P3L     $r4
-	#define P3H     $r5
-	#define O1L     $r7
-	#define O1H	$r8
+#ifdef __big_endian__
+	#define P1H	$r0
+	#define P1L	$r1
+	#define P2H	$r2
+	#define P2L	$r3
+	#define MANAH	$r6
+	#define MANAL	$r7
+	#define MANBH	$r8
+	#define MANBL	$r9
 #else
-	#define P1H     $r0
-	#define P1L     $r1
-	#define P2H     $r2
-	#define P2L     $r3
-	#define P3H     $r4
-	#define P3L     $r5
-	#define O1H     $r7
-	#define O1L	$r8
-#endif
+	#define P1L	$r0
+	#define P1H	$r1
+	#define P2L	$r2
+	#define P2H	$r3
+	#define MANAL	$r6
+	#define MANAH	$r7
+	#define MANBL	$r8
+	#define MANBH	$r9
+#endif
+#define EXPOA	$r4
+#define EXPOB	$r5
+#define SIGN	$fp
+#define W1	$r5
+#define W2	P2L
+#define W3	P2H
+#define W4	P1L
+#define W5	P1H
+#define AXORB	$r10
+
 	.text
 	.align	2
-	.global	__muldf3
-	.type	__muldf3, @function
-__muldf3:
-	push    $lp
-	pushm   $r6, $r10
-
-	slli    $r6, P1H, #1
-	srli    $r6, $r6, #21
-	slli    P3H, P1H, #11
-	srli    $r10, P1L, #21
-	or      P3H, P3H, $r10
-	slli    P3L, P1L, #11
-	move    O1L, #0x80000000
-	or      P3H, P3H, O1L
-	slli    $r9, P2H, #1
-	srli    $r9, $r9, #21
-	slli    O1H, P2H, #11
-	srli    $r10, P2L, #21
-	or      O1H, O1H, $r10
-	or      O1H, O1H, O1L
-	xor     P1H, P1H, P2H
-	and     P1H, P1H, O1L
-	slli    O1L, P2L, #11
-
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LFspecA
-
-.LFlab1:
-	addi    $r10, $r9, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LFspecB
-
-.LFlab2:
-	addi    $r10, $r9, #0xfffffc02
-	add     $r6, $r6, $r10
-
-	move    $r10, $r8
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r9, $r3) is (high, low). */
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r8, $r5, $r8
-#else
-	pushm	$r0, $r5
-	move	$r0, $r5
-	movi	$r1, #0
-	move	$r2, $r8
-	movi	$r3, #0
-	bal	__muldi3
-	movd44	$r8, $r0
-	popm	$r0, $r5
-#endif
-	move    $r3, $r8
-#else /* __big_endian__ */
-/* For big endain: ($r9, $r2) is (high, low). */
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r8, $r4, $r7
-#else
-	pushm	$r0, $r5
-	move	$r1, $r4
-	movi	$r0, #0
-	move	$r3, $r7
-	movi	$r2, #0
-	bal	__muldi3
-	movd44	$r8, $r0
-	popm	$r0, $r5
-#endif
-	move    $r2, $r9
-	move    $r9, $r8
-#endif /* __big_endian__ */
-	move    $r8, $r10
+	.global	__divdf3
+	.type	__divdf3, @function
+__divdf3:
+	push25	$r10, #16
 
-	move    $r10, P1H
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r0, $r2) is (high, low). */
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r4, $r8
-#else
-	pushm	$r2, $r5
-	move	$r0, $r4
-	movi	$r1, #0
-	move	$r2, $r8
-	movi	$r3, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move    $r2, $r0
-	move    $r0, $r1
-#else /* __big_endian__ */
-/* For big endain: ($r1, $r3) is (high, low). */
+	slli	EXPOA, P1H, #1
+	srli	EXPOA, EXPOA, #21	! exponent(a)
+	slli	MANAH, P1H, #11		! (dirty) mantissa(a)
+	srli	MANAL, P1L, #21
+	or	MANAH, MANAH, MANAL
+	slli	MANAL, P1L, #11
+	move	SIGN, #0x80000000
+	slli	EXPOB, P2H, #1
+	srli	EXPOB, EXPOB, #21	! exponent(b)
+	slli	MANBH, P2H, #11		! (dirty) mantissa(b)
+	srli	MANBL, P2L, #21
+	or	MANBH, MANBH, MANBL
+	slli	MANBL, P2L, #11
+	xor	W3, P2H, P1H
+	and	AXORB, W3, SIGN		! sign of (A xor B)
+
+	move	W3, 0x7ff
+	beqz	EXPOA, .LGAexpzero	! exponent(A) is 0x000
+	beq	W3, EXPOA, .LGAinfnan	! exponent(A) is 0x7ff
+	or	MANAH, MANAH, SIGN
+
+.LGmain1:
+	beqz	EXPOB, .LGBexpzero	! exponent(B) is 0x000
+	beq	W3, EXPOB, .LGBinfnan	! exponent(B) is 0x7ff
+	or	MANBH, MANBH, SIGN
+
+	! ---------------------------------------------------------------------
+	! divide two 64-bit unsigned integers for 64-bit quotient.
+	! ---------------------------------------------------------------------
+.LGmain2:
+	! exponent(quotient) = exponent(A) - exponent(B) + 0x3ff
+	sub	EXPOA, EXPOA, EXPOB
+	addi	EXPOA, EXPOA, #0x3ff
+	! Use mantissa(A)>>1: hi-part 31 bits and lo-part 22 bits
+	srli	MANAL, MANAL, #1
+	slli	W5, MANAH, #31
+	or	MANAL, MANAL, W5
+	srli	MANAH, MANAH, #1
+	! Split divisor into four 16-bit parts to do division:
+	! HH in W2, HL in W5
+	srli	W2, MANBH, #16
+	divr	W3, MANAH, MANAH, W2
+	zeh	W5, MANBH
+	mul	W1, W5, W3
+	slli	MANAH, MANAH, #16
+	srli	W4, MANAL, #16
+	or	MANAH, MANAH, W4
+	move	W4, MANAH
+	sub	MANAH, MANAH, W1
+	slt	$r15, W4, MANAH
+	beqz	$r15, .LGmain3
+
+.LGloop1:
+	addi	W3, W3, #-1
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	$r15, .LGloop1
+
+.LGmain3:
+	divr	W2, MANAH, MANAH, W2
+	mul	W1, W5, W2
+	slli	MANAH, MANAH, #16
+	zeh	W4, MANAL
+	or	MANAH, MANAH, W4
+	move	W4, MANAH
+	sub	MANAH, MANAH, W1
+	slt	$r15, W4, MANAH
+	beqz	$r15, .LGmain4
+
+.LGloop2:
+	addi	W2, W2, #-1
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	$r15, .LGloop2
+
+.LGmain4:
+	slli	W3, W3, #16
+	add	W3, W3, W2
+	! This is a 64-bit multiplication: (P1H, P1L) is (high, low).
 #ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r5, $r7
+	mulr64	$r0, W3, MANBL
 #else
-	pushm	$r2, $r5
-	move	$r1, $r5
-	movi	$r0, #0
-	move	$r3, $r7
-	movi	$r2, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move    $r3, $r1
-	move    $r1, $r0
-#endif /* __big_endian__ */
-	move    P1H, $r10
+	swi	EXPOA, [$sp+(8)]
+	swi	W3, [$sp+(4)]
+	move	$r0, W3
+	move	$r1, MANBL
+	bal	umul_ppmm
+	lwi	W3, [$sp+(4)]
+#endif
+	subri	MANAL, P1L, #0
+	move	W4, MANAH
+	sub	MANAH, MANAH, P1H
+	slt	$r15, W4, MANAH
+	beqz	MANAL, .LGmain5
+	move	W4, MANAH
+	addi	MANAH, MANAH, #-1
+	bnez	$r15, .LGloopA
+	slt	$r15, W4, MANAH
+
+.LGmain5:
+	beqz	$r15, .LGmain6
+
+.LGloopA:
+	addi	W3, W3, #-1
+	add	MANAL, MANAL, MANBL
+	slt	W4, MANAL, MANBL
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	W4, .LGloopA2
+	addi	MANAH, MANAH, #1
+	bnez	$r15, .LGmain6
+	slti	$r15, MANAH, #1
+
+.LGloopA2:
+	beqz	$r15, .LGloopA
+
+.LGmain6:
+	bne	MANAH, MANBH, .Li25
+	move	P1H, MANBL
+	move	MANAH, MANAL
+	move	W2, #0
+	move	P1L, #0
+	b	.LGmain7
 
-	#ADD(P2H, P1L)
-	add     P2H, P2H, P1L
-	slt     $r15, P2H, P1L
+.Li25:
+	srli	W5, MANBH, #16
+	divr	W2, MANAH, MANAH, W5
+	zeh	W4, MANBH
+	mul	$r15, W4, W2
+	slli	MANAH, MANAH, #16
+	srli	W1, MANAL, #16
+	or	MANAH, MANAH, W1
+	move	W1, MANAH
+	sub	MANAH, MANAH, $r15
+	slt	$r15, W1, MANAH
+	beqz	$r15, .Li26
+
+.LGloop3:
+	addi	W2, W2, #-1
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	$r15, .LGloop3
 
-	#ADDC($r9, $0x0)
-	add     $r9, $r9, $r15
+.Li26:
+	divr	W5, MANAH, MANAH, W5
+	mul	W1, W4, W5
+	slli	MANAH, MANAH, #16
+	zeh	W4, MANAL
+	or	MANAH, MANAH, W4
+	move	W4, MANAH
+	sub	MANAH, MANAH, W1
+	slt	$r15, W4, MANAH
+	beqz	$r15, .Li28
+
+.LGloop4:
+	addi	W5, W5, #-1
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	$r15, .LGloop4
 
-	move    $r10, P1H
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r0, $r8) is (high, low). */
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r5, $r7
-#else
-	pushm	$r2, $r5
-	move	$r0, $r5
-	movi	$r1, #0
-	move	$r2, $r7
-	movi	$r3, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move    $r8, $r0
-	move    $r0, $r1
-#else /* __big_endian__ */
-/* For big endian: ($r1, $r7) is (high, low). */
+.Li28:
+	slli	W2, W2, #16
+	add	W2, W2, W5
+	! This is a 64-bit multiplication: (P1H, P1L) is (high, low).
 #ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r4, $r8
+	mulr64	$r0, W2, MANBL
 #else
-	pushm	$r2, $r5
-	move	$r1, $r4
-	movi	$r0, #0
-	move	$r3, $r8
-	movi	$r2, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move	$r7, $r1
-	move	$r1, $r0
-#endif /* __big_endian__ */
-	move    P1H, $r10
+	smw.bi	$r2, [$sp], $r3, #0
+	move	$r0, W2
+	move	$r1, MANBL
+	bal	umul_ppmm
+	lmw.bi	$r2, [$sp], $r3, #0
+#endif
+
+.LGmain7:
+	subri	MANAL, P1L, #0
+	move	W4, MANAH
+	sub	MANAH, MANAH, P1H
+	slt	$r15, W4, MANAH
+	beqz	MANAL, .LGmain8
+	move	W4, MANAH
+	addi	MANAH, MANAH, #-1
+	bnez	$r15, .LGloopB
+	slt	$r15, W4, MANAH
+
+.LGmain8:
+	beqz	$r15, .LGmain9
+
+.LGloopB:
+	addi	W2, W2, #-1
+	add	MANAL, MANAL, MANBL
+	slt	W4, MANAL, MANBL
+	add	MANAH, MANAH, MANBH
+	slt	$r15, MANAH, MANBH
+	beqz	W4, .LGloopB2
+	addi	MANAH, MANAH, #1
+	bnez	$r15, .LGmain9
+	slti	$r15, MANAH, #1
 
-	#ADD(P2L, O1H)
-	add     P2L, P2L, O1H
-	slt     $r15, P2L, O1H
-
-
-	#ADDCC(P2H, P1L)
-	beqzs8  .LL29
-	add     P2H, P2H, P1L
-	slt     $r15, P2H, P1L
-	beqzs8  .LL30
-	addi    P2H, P2H, #0x1
-	j       .LL31
-.LL30:
-	move    $r15, #1
-	add     P2H, P2H, $r15
-	slt     $r15, P2H, $r15
-	j       .LL31
-.LL29:
-	add     P2H, P2H, P1L
-	slt     $r15, P2H, P1L
-.LL31:
+.LGloopB2:
+	beqz	$r15, .LGloopB
 
-	#ADDC($r9, $0x0)
-	add     $r9, $r9, $r15
-
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r8, $r0) is (high, low). */
-	move    $r10, $r9
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r8, $r4, $r7
-#else
-	pushm	$r0, $r5
-	move	$r0, $r4
-	movi	$r1, #0
-	move	$r2, $r7
-	movi	$r3, #0
-	bal	__muldi3
-	movd44	$r8, $r0
-	popm	$r0, $r5
-#endif
-	move    $r0, $r8
-	move    $r8, $r9
-	move    $r9, $r10
-#else /* __big_endian__ */
-/* For big endian: ($r7, $r1) is (high, low). */
-	move	$r10, $r6
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r6, $r5, $r8
-#else
-	pushm	$r0, $r5
-	move	$r1, $r5
-	movi	$r0, #0
-	move	$r3, $r8
-	movi	$r2, #0
-	bal	__muldi3
-	movd44	$r6, $r0
-	popm	$r0, $r5
+.LGmain9:
+	sltsi	$r15, W3, #0
+#ifdef __NDS32_ISA_V3M__
+	lwi	EXPOA, [$sp+(8)]
 #endif
-	move	$r1, $r7
-	move	$r7, $r6
-	move	$r6, $r10
-#endif /* __big_endian__ */
-
-	#ADD(P2L, O1H)
-	add     P2L, P2L, O1H
-	slt     $r15, P2L, O1H
+	bnez	$r15, .LGmain10
+	move	$r15, W2
+	add	W2, W2, W2
+	slt	$r15, W2, $r15
+	add	W3, W3, W3
+	add	W3, W3, $r15
+	addi	EXPOA, EXPOA, #-1
+
+.LGmain10:
+	or	MANAH, MANAH, MANAL
+	ori	P1L, W2, #1
+	cmovz	P1L, W2, MANAH
+	move	P1H, W3
+	blez	EXPOA, .LGunderflow	! exponent(quotient) is too small
+	subri	W1, EXPOA, #0x7ff
+	blez	W1, .LGinf		! exponent(quotient) is too big, return inf
+	addi	P1L, P1L, #0x400
+	slti	$r15, P1L, #0x400
+	beqz	$r15, .LGround
+	add	P1H, P1H, $r15
+	slt	$r15, P1H, $r15
+	add	EXPOA, EXPOA, $r15
+
+	! do rounding
+.LGround:
+	srli	W2, P1L, #11
+	andi	W2, W2, #1
+	sub	P1L, P1L, W2
+	srli	P1L, P1L, #11
+	slli	W2, P1H, #21
+	or	P1L, P1L, W2
+
+	! do packing
+	slli	P1H, P1H, #1
+	srli	P1H, P1H, #12
+	slli	W1, EXPOA, #20
+	or	P1H, P1H, W1
 
+.LGret:
+	or	P1H, P1H, AXORB
+	pop25	$r10, #16
 
-	#ADDCC(P2H, $0x0)
-	beqzs8  .LL34
-	add     P2H, P2H, $r15
-	slt     $r15, P2H, $r15
-.LL34:
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x000
+	! ---------------------------------------------------------------------
+.LGAexpzero:
+	or	$r15, MANAH, MANAL
+	beqz	$r15, .LGAzero		! A is zero
+	! A is subnormal
+	srli	$r15, MANAL, #31
+	add	MANAH, MANAH, MANAH
+	add	MANAH, MANAH, $r15
+	add	MANAL, MANAL, MANAL
+	! count leading zeros of A
+	bnez	MANAH, .LGAcont
+	move	MANAH, MANAL
+	move	MANAL, #0
+	addi	EXPOA, EXPOA, #-32
+
+	! MANAH is non-zero
+.LGAcont:
+#ifdef __NDS32_EXT_PERF__
+	clz	W4, MANAH
+#else
+	move	W4, #0
+	move	W5, MANAH
+	b	.LGAloop2
+
+.LGAloop:
+	add	W5, W5, W5
+	addi	W4, W4, #1
+
+.LGAloop2:
+	slt	$r15, W5, SIGN
+	bnez	$r15, .LGAloop
+#endif
+	beqz	W4, .LGmain1
+	sub	EXPOA, EXPOA, W4
+	subri	W2, W4, #32
+	srl	W2, MANAL, W2
+	sll	MANAL, MANAL, W4
+	sll	MANAH, MANAH, W4
+	or	MANAH, MANAH, W2
+	b	.LGmain1
+
+.LGAzero:
+	beq	W3, EXPOB, .LGAzero2	! B is NaN or inf, goto .LGAzero2
+	! B is finite: check whether B is zero.
+	bnez	EXPOB, .LGsetsign
+	or	$r15, MANBH, MANBL
+	beqz	$r15, .LGnan	! zero/zero is NaN
+
+.LGsetsign:
+	move	P1H, AXORB
+	pop25	$r10, #16
+
+.LGAzero2:
+	or	MANBH, MANBH, MANBL
+	beq	MANBH, SIGN, .LGsetsign	! zero/inf is zero
+	! zero/NaN is NaN
 
-	#ADDC($r9, $0x0)
-	add     $r9, $r9, $r15
-	or      $r10, P1L, P2L
-	beqz    $r10, .Li13
-	ori     P2H, P2H, #1
-.Li13:
-	move    P3H, $r9
-	move    P3L, P2H
-	sltsi   $r15, P3H, #0
-	bnezs8  .Li14
-
-	move    $r15, P3L
-	add     P3L, P3L, P3L
-	slt     $r15, P3L, $r15
-	add     P3H, P3H, P3H
-	add     P3H, P3H, $r15
-	addi    $r6, $r6, #-1
-.Li14:
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LFoveund
+.LGnan:
+	move	P1L, #0
+	move	P1H, #0xfff80000
+	pop25	$r10, #16
 
-	#ADD(P3L, $0x400)
-	move    $r15, #0x400
-	add     P3L, P3L, $r15
-	slt     $r15, P3L, $r15
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x7ff
+	! ---------------------------------------------------------------------
+.LGAinfnan:
+	or	MANAH, MANAH, MANAL
+	bne	MANAH, SIGN, .LGnan		! A is NaN, return NaN
+	! A is inf: check whether B is finite.
+	beq	W3, EXPOB, .LGnan	! both inf/inf and inf/NaN are NaN
+	! inf/finite is inf
 
+.LGinf:
+	move	P1L, #0
+	move	P1H, #0x7ff00000
+	or	P1H, P1H, AXORB
+	pop25	$r10, #16
+
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x7ff
+	! ---------------------------------------------------------------------
+.LGBinfnan:
+	or	MANBH, MANBH, MANBL
+	bne	MANBH, SIGN, .LGnan		! B is NaN, return NaN
+	! B is inf: finite/inf is zero
+	move P1L, #0
+	b	.LGsetsign
+
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x000
+	! ---------------------------------------------------------------------
+.LGBexpzero:
+	or	$r15, MANBH, MANBL
+	beqz	$r15, .LGinf		! finite/zero is inf
+	! B is subnormal
+	srli	$r15, MANBL, #31
+	add	MANBH, MANBH, MANBH
+	add	MANBH, MANBH, $r15
+	add	MANBL, MANBL, MANBL
+	! count leading zeros of B
+	bnez	MANBH, .LGBcont
+	move	MANBH, MANBL
+	move	MANBL, #0
+	addi	EXPOB, EXPOB, #-32
+
+	! MANBH is non-zero
+.LGBcont:
+#ifdef __NDS32_EXT_PERF__
+	clz	W4, MANBH
+#else
+	move	W4, #0
+	move	W5, MANBH
+	b	.LGBloop2
+
+.LGBloop:
+	add	W5, W5, W5
+	addi	W4, W4, #1
+
+.LGBloop2:
+	slt	$r15, W5, SIGN
+	bnez	$r15, .LGBloop
+#endif
+	beqz	W4, .LGmain2
+	sub	EXPOB, EXPOB, W4
+	subri	W2, W4, #32
+	srl	W2, MANBL, W2
+	sll	MANBL, MANBL, W4
+	sll	MANBH, MANBH, W4
+	or	MANBH, MANBH, W2
+	b	.LGmain2
+
+	! ---------------------------------------------------------------------
+	! handle underflow
+	! ---------------------------------------------------------------------
+.LGunderflow:
+	move	MANAL, #0
+	subri	W3, EXPOA, #1
+	slti	$r15, W3, #0x20
+	bnez	$r15, .LGunderflow2
+	move	MANAL, P1L
+	move	P1L, P1H
+	move	P1H, #0
+	addi	W3, W3, #0xffffffe0
+	beqz	P1L, .LGunderflow2
+	slti	$r15, W3, #0x20
+	beqz	$r15, .LGignore		! result too small, return zero
+
+	! 1-exponent(A), in W3, is 0-31
+.LGunderflow2:
+	beqz	W3, .LGunderflow3	! it is zero, skip
+	subri	W2, W3, #0x20
+	sll	MANAH, P1H, W2
+	sll	W1, P1L, W2
+	srl	P1L, P1L, W3
+	srl	P1H, P1H, W3
+	or	P1L, P1L, MANAH
+	or	MANAL, MANAL, W1
+!	ori	W3, P1L, #1
+!	cmovn	P1L, W3, MANAL
+	beqz	MANAL, .LGunderflow3
+	ori	P1L, P1L, #1
+
+.LGunderflow3:
+	addi	P1L, P1L, #0x400
+	slti	$r15, P1L, #0x400
+	add	P1H, P1H, $r15
+	srli	EXPOA, P1H, #31
+	b	.LGround
+
+.LGignore:
+	move	P1L, #0
+	b	.LGsetsign
+	.size __divdf3, .-__divdf3
+#endif /* L_div_df */
 
-	#ADDCC(P3H, $0x0)
-	beqzs8  .LL37
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-.LL37:
-
-	#ADDC($r6, $0x0)
-	add     $r6, $r6, $r15
-
-.LFlab8:
-	srli    $r10, P3L, #11
-	andi    $r10, $r10, #1
-	sub     P3L, P3L, $r10
-	srli    P1L, P3L, #11
-	slli    $r10, P3H, #21
-	or      P1L, P1L, $r10
-	slli    $r10, P3H, #1
-	srli    $r10, $r10, #12
-	or      P1H, P1H, $r10
-	slli    $r10, $r6, #20
-	or      P1H, P1H, $r10
 
-.LFret:
-.LF999:
-	popm    $r6, $r10
-	pop     $lp
-	ret5    $lp
 
-.LFspecA:
-	#ADD(P3L, P3L)
-	move    $r15, P3L
-	add     P3L, P3L, P3L
-	slt     $r15, P3L, $r15
-
-	#ADDC(P3H, P3H)
-	add     P3H, P3H, P3H
-	add     P3H, P3H, $r15
-	bnez    $r6, .Li15
-	or      $r10, P3H, P3L
-	beqz    $r10, .Li16
-
-
-	#NORMd($r4, P1L, P2H)
-	bnez    P3H, .LL38
-	bnez    P3L, .LL39
-	move    $r6, #0
-	j       .LL40
-.LL39:
-	move    P3H, P3L
-	move    P3L, #0
-	move    P1L, #32
-	sub     $r6, $r6, P1L
-.LL38:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r0, P3H
-#else
-	pushm	$r1, P3H
-	move	$r0, P3H
-	bal	__clzsi2
-	popm	$r1, $r5
-#endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r1, $r4
-#else
-	push	$r0
-	pushm	$r2, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r1, $r0
-	popm	$r2, $r5
-	pop	$r0
-#endif
-#endif /* __big_endian__ */
-	beqz    P1L, .LL40
-	sub     $r6, $r6, P1L
-	subri   P2H, P1L, #32
-	srl     P2H, P3L, P2H
-	sll     P3L, P3L, P1L
-	sll     P3H, P3H, P1L
-	or      P3H, P3H, P2H
-.LL40:
-	#NORMd End
+#ifdef L_mul_sf
 
-	j       .LFlab1
-.Li16:
-	subri   $r15, $r9, #0x7ff
-	beqzs8  .LFnan
-	j       .LFret
-.Li15:
-	or      $r10, P3H, P3L
-	bnez    $r10, .LFnan
-	bnez    $r9, .Li17
-	slli    $r10, O1H, #1
-	or      $r10, $r10, O1L
-	beqz    $r10, .LFnan
-.Li17:
-	subri   $r15, $r9, #0x7ff
-	bnezs8  .LFinf
-
-.LFspecB:
-	#ADD(O1L, O1L)
-	move    $r15, O1L
-	add     O1L, O1L, O1L
-	slt     $r15, O1L, $r15
-
-	#ADDC(O1H, O1H)
-	add     O1H, O1H, O1H
-	add     O1H, O1H, $r15
-	bnez    $r9, .Li18
-	or      $r10, O1H, O1L
-	beqz    $r10, .Li19
-
-
-	#NORMd($r7, P2L, P1L)
-	bnez    O1H, .LL41
-	bnez    O1L, .LL42
-	move    $r9, #0
-	j       .LL43
-.LL42:
-	move    O1H, O1L
-	move    O1L, #0
-	move    P2L, #32
-	sub     $r9, $r9, P2L
-.LL41:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, $r8
+#if !defined (__big_endian__)
+	#define P1L	$r0
+	#define P1H	$r1
+	#define P2L	$r2
+	#define P2H	$r3
 #else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, $r8
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
+	#define P1H	$r0
+	#define P1L	$r1
+	#define P2H	$r2
+	#define P2L	$r3
 #endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r3, $r7
-#else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	move	$r0, $r7
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
+#define SIGN	$r4
+#ifdef __NDS32_REDUCE_REGS__
+#define EXPOA	$r6
+#define MANTA	P2L
+#define VALUA	P2H
+#define EXPOB	$r7
+#define MANTB	P1L
+#define VALUB	P1H
+#define SPROD	$r8
+#else
+#define EXPOA	$r16
+#define MANTA	P2L
+#define VALUA	P2H
+#define EXPOB	$r17
+#define MANTB	P1L
+#define VALUB	P1H
+#define SPROD	$r18
 #endif
-#endif /* __big_endian__ */
-	beqz    P2L, .LL43
-	sub     $r9, $r9, P2L
-	subri   P1L, P2L, #32
-	srl     P1L, O1L, P1L
-	sll     O1L, O1L, P2L
-	sll     O1H, O1H, P2L
-	or      O1H, O1H, P1L
-.LL43:
-	#NORMd End
-
-	j       .LFlab2
-.Li19:
-	move    P1L, #0
-	j       .LFret
-.Li18:
-	or      $r10, O1H, O1L
-	bnez    $r10, .LFnan
-
-.LFinf:
-	move    $r10, #0x7ff00000
-	or      P1H, P1H, $r10
-	move    P1L, #0
-	j       .LFret
-
-.LFnan:
-	move    P1H, #0xfff80000
-	move    P1L, #0
-	j       .LFret
-
-.LFoveund:
-	bgtz    $r6, .LFinf
-	subri   P1L, $r6, #1
-	move    P2L, #0
-.LL44:
-	move    $r10, #0x20
-	slt     $r15, P1L, $r10
-	bnezs8  .LL45
-	or      P2L, P2L, P3L
-	move    P3L, P3H
-	move    P3H, #0
-	addi    P1L, P1L, #0xffffffe0
-	bnez    P3L, .LL44
-.LL45:
-	beqz    P1L, .LL46
-	move    P2H, P3H
-	move    $r10, P3L
-	srl     P3L, P3L, P1L
-	srl     P3H, P3H, P1L
-	subri   P1L, P1L, #0x20
-	sll     P2H, P2H, P1L
-	or      P3L, P3L, P2H
-	sll     $r10, $r10, P1L
-	or      P2L, P2L, $r10
-	beqz    P2L, .LL46
-	ori     P3L, P3L, #1
-.LL46:
-	#ADD(P3L, $0x400)
-	move    $r15, #0x400
-	add     P3L, P3L, $r15
-	slt     $r15, P3L, $r15
-
-	#ADDC(P3H, $0x0)
-	add     P3H, P3H, $r15
-	srli    $r6, P3H, #31
-	j       .LFlab8
-	.size __muldf3, .-__muldf3
-#endif /* L_mul_df */
-
-
-
-#ifdef L_div_sf
+#define W0	VALUB
+#define W1	$r5
 
 	.text
 	.align	2
-	.global	__divsf3
-	.type	__divsf3, @function
-__divsf3:
-	push    $lp
-	pushm   $r6, $r10
-
-	move    $r7, #0x80000000
-	srli    $r4, $r0, #23
-	andi    $r4, $r4, #0xff
-	srli    $r6, $r1, #23
-	andi    $r6, $r6, #0xff
-	slli    $r3, $r0, #8
-	or      $r3, $r3, $r7
-	slli    $r5, $r1, #8
-	or      $r5, $r5, $r7
-	xor     $r10, $r0, $r1
-	and     $r7, $r7, $r10
-
-	addi    $r10, $r4, #-1
-	slti    $r15, $r10, #0xfe
-	beqzs8  .LGspecA
-
-.LGlab1:
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0xfe
-	beqzs8  .LGspecB
-
-.LGlab2:
-	slt     $r15, $r3, $r5
-	bnezs8  .Li27
-	srli    $r3, $r3, #1
-	addi    $r4, $r4, #1
-.Li27:
-	srli    $r8, $r5, #14
-	divr    $r0, $r2, $r3, $r8
-	andi    $r9, $r5, #0x3fff
-	mul     $r1, $r9, $r0
-	slli    $r2, $r2, #14
-
-	#SUB($r2, $r1)
-	move    $r15, $r2
-	sub     $r2, $r2, $r1
-	slt     $r15, $r15, $r2
-	beqzs8  .Li28
-	addi    $r0, $r0, #-1
-
-	#ADD($r2, $r5)
-	add     $r2, $r2, $r5
-	slt     $r15, $r2, $r5
-.Li28:
-	divr    $r3, $r2, $r2, $r8
-	mul     $r1, $r9, $r3
-	slli    $r2, $r2, #14
-
-	#SUB($r2, $r1)
-	move    $r15, $r2
-	sub     $r2, $r2, $r1
-	slt     $r15, $r15, $r2
-	beqzs8  .Li29
-	addi    $r3, $r3, #-1
-
-	#ADD($r2, $r5)
-	add     $r2, $r2, $r5
-	slt     $r15, $r2, $r5
-.Li29:
-	slli    $r10, $r0, #14
-	add     $r3, $r3, $r10
-	slli    $r3, $r3, #4
-	beqz    $r2, .Li30
-	ori     $r3, $r3, #1
-.Li30:
-	subri   $r10, $r6, #0x7e
-	add     $r4, $r4, $r10
-	addi    $r10, $r4, #-1
-	slti    $r15, $r10, #0xfe
-	beqzs8  .LGoveund
-
-.LGlab8:
-	#ADD($r3, $0x80)
-	move    $r15, #0x80
-	add     $r3, $r3, $r15
-	slt     $r15, $r3, $r15
-
-	#ADDC($r4, $0x0)
-	add     $r4, $r4, $r15
-	srli    $r10, $r3, #8
-	andi    $r10, $r10, #1
-	sub     $r3, $r3, $r10
-	slli    $r3, $r3, #1
-	srli    $r3, $r3, #9
-	slli    $r10, $r4, #23
-	or      $r3, $r3, $r10
-	or      $r0, $r3, $r7
-
-.LG999:
-	popm    $r6, $r10
-	pop     $lp
-	ret5    $lp
+	.global	__mulsf3
+	.type	__mulsf3, @function
+__mulsf3:
+#ifdef __NDS32_REDUCE_REGS__
+	smw.adm	$r6, [$sp], $r8, 2
+#endif
 
-.LGspecA:
-	bnez    $r4, .Li31
-	add     $r3, $r3, $r3
-	beqz    $r3, .Li31
-#ifdef __NDS32_PERF_EXT__
-	clz	$r8, $r3
-#else
-	pushm	$r0, $r5
-	move	$r0, $r3
-	bal	__clzsi2
-	move	$r8, $r0
-	popm	$r0, $r5
+	xor	SPROD, $r1, $r0
+	move	SIGN, #0x80000000
+	and	SPROD, SPROD, SIGN	! sign(A xor B)
+	slli	VALUA, $r0, 1		! A<<1
+	slli	VALUB, $r1, 1		! B<<1
+	srli	EXPOA, VALUA, 24	! exponent(A)
+	srli	EXPOB, VALUB, 24	! exponent(B)
+	slli	MANTA, VALUA, 7		! mantissa(A)<<8
+	slli	MANTB, VALUB, 7		! mantissa(B)<<8
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqz	EXPOA, .LFzeroAexp	! exponent(A) is zero, goto .LFzeroAexp
+	beqc	EXPOA, 0xff, .LFinfnanA	! A is inf or NaN, goto .LFinfnanA
+#else
+	move	W1, #0xff
+	beqz	EXPOA, .LFzeroAexp	! exponent(A) is zero, goto .LFzeroAexp
+	beq	W1, EXPOA, .LFinfnanA	! A is inf or NaN, goto .LFinfnanA
 #endif
-	sub     $r4, $r4, $r8
-	sll     $r3, $r3, $r8
-	j       .LGlab1
-.Li31:
-	bne     $r6, $r4, .Li33
-	add     $r10, $r5, $r5
-	beqz    $r10, .LGnan
-.Li33:
-	subri   $r15, $r6, #0xff
-	beqzs8  .LGspecB
-	beqz    $r4, .LGzer
-	add     $r10, $r3, $r3
-	bnez    $r10, .LGnan
-	j       .LGinf
-
-.LGspecB:
-	bnez    $r6, .Li34
-	add     $r5, $r5, $r5
-	beqz    $r5, .LGinf
-#ifdef __NDS32_PERF_EXT__
-	clz	$r8, $r5
+	or	MANTA, MANTA, SIGN
+
+.LFlab1:
+	beqz	EXPOB, .LFzeroB		! exponent(B) is zero, goto .LFzeroB
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOB, 0xff, .LFinfnanB	! B is inf or NaN, goto .LFinfnanB
 #else
-	pushm	$r0, $r5
-	move	$r0, $r5
-	bal	__clzsi2
-	move	$r8, $r0
-	popm	$r0, $r5
+	beq	W1, EXPOB, .LFinfnanB	! B is inf or NaN, goto .LFinfnanB
 #endif
-	sub     $r6, $r6, $r8
-	sll     $r5, $r5, $r8
-	j       .LGlab2
-.Li34:
-	add     $r10, $r5, $r5
-	bnez    $r10, .LGnan
-
-.LGzer:
-	move    $r0, $r7
-	j       .LG999
-
-.LGoveund:
-	bgtz    $r4, .LGinf
-	subri   $r8, $r4, #1
-	slti    $r15, $r8, #0x20
-	beqzs8  .LGzer
-	subri   $r10, $r8, #0x20
-	sll     $r4, $r3, $r10
-	srl     $r3, $r3, $r8
-	beqz    $r4, .Li37
-	ori     $r3, $r3, #2
-.Li37:
-	move    $r4, #0
-	addi    $r10, $r3, #0x80
-	sltsi   $r15, $r10, #0
-	beqzs8  .LGlab8
-	move    $r4, #1
-	j       .LGlab8
-
-.LGinf:
-	move    $r10, #0x7f800000
-	or      $r0, $r7, $r10
-	j       .LG999
-
-.LGnan:
-	move    $r0, #0xffc00000
-	j       .LG999
-	.size	__divsf3, .-__divsf3
-#endif /* L_div_sf */
+	or	MANTB, MANTB, SIGN
 
-
-
-#ifdef L_div_df
-
-#ifndef __big_endian__
-	#define P1L     $r0
-	#define P1H     $r1
-	#define P2L     $r2
-	#define P2H     $r3
-	#define P3L     $r4
-	#define P3H     $r5
-	#define O1L     $r7
-	#define O1H	$r8
+	! ---------------------------------------------------------------------
+	! This is a 64-bit multiplication.
+	! ---------------------------------------------------------------------
+.LFlab2:
+#ifdef __NDS32_ISA_V3M__
+	move	P1H, MANTA
+	bal	umul_ppmm
 #else
-	#define P1H     $r0
-	#define P1L     $r1
-	#define P2H     $r2
-	#define P2L     $r3
-	#define P3H     $r4
-	#define P3L     $r5
-	#define O1H     $r7
-	#define O1L	$r8
+	mulr64	$r0, MANTA, MANTB
 #endif
-	.text
-	.align	2
-	.global	__divdf3
-	.type	__divdf3, @function
-__divdf3:
-	push    $lp
-	pushm   $r6, $r10
+	ori	MANTA, P1H, #1
+	cmovz	MANTA, P1H, P1L
+	sltsi	$r15, MANTA, #0
+	bnezs8	.Li18
+	slli	MANTA, MANTA, #1
+	addi	EXPOA, EXPOA, #-1
 
-	slli    $r6, P1H, #1
-	srli    $r6, $r6, #21
-	slli    P3H, P1H, #11
-	srli    $r10, P1L, #21
-	or      P3H, P3H, $r10
-	slli    P3L, P1L, #11
-	move    O1L, #0x80000000
-	or      P3H, P3H, O1L
-	slli    $r9, P2H, #1
-	srli    $r9, $r9, #21
-	slli    O1H, P2H, #11
-	srli    $r10, P2L, #21
-	or      O1H, O1H, $r10
-	or      O1H, O1H, O1L
-	xor     P1H, P1H, P2H
-	and     P1H, P1H, O1L
-	slli    O1L, P2L, #11
-
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LGspecA
+.Li18:
+	addi	W1, EXPOB, #0xffffff82
+	add	EXPOA, EXPOA, W1
+	blez	EXPOA, .LFunder		! A*B underflow, goto .LFunder
+	slti	$r15, EXPOA, #0xff
+	beqz	$r15, .LFinf		! A*B overflow, goto .LFinf
+
+	! ---------------------------------------------------------------------
+	! do rounding
+	! ---------------------------------------------------------------------
+.LFround:
+	addi	MANTA, MANTA, #128
+	slti	$r15, MANTA, #128
+	add	EXPOA, EXPOA, $r15
+	srli	W1, MANTA, #8
+	andi	W1, W1, #1
+	sub	MANTA, MANTA, W1
+
+	! ---------------------------------------------------------------------
+	! pack result
+	! ---------------------------------------------------------------------
+	slli	MANTA, MANTA, #1
+	srli	MANTA, MANTA, #9
+	slli	$r0, EXPOA, #23
+	or	$r0, $r0, MANTA
+.LFpack:
+	or	$r0, $r0, SPROD
 
-.LGlab1:
-	addi    $r10, $r9, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LGspecB
+.LFret:
+#ifdef __NDS32_REDUCE_REGS__
+	lmw.bim	$r6, [$sp], $r8, 2
+#endif
+	ret5	$lp
 
-.LGlab2:
-	sub     $r6, $r6, $r9
-	addi    $r6, $r6, #0x3ff
-	srli    P3L, P3L, #1
-	slli    $r10, P3H, #31
-	or      P3L, P3L, $r10
-	srli    P3H, P3H, #1
-	srli    $r9, O1H, #16
-	divr    P2H, P3H, P3H, $r9
-	move    $r10, #0xffff
-	and     P2L, O1H, $r10
-	mul     P1L, P2L, P2H
-	slli    P3H, P3H, #16
-	srli    $r10, P3L, #16
-	or      P3H, P3H, $r10
-
-	#SUB(P3H, P1L)
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .Li20
-
-.Lb21:
-	addi    P2H, P2H, #-1
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .Lb21
-.Li20:
-	divr    $r9, P3H, P3H, $r9
-	mul     P1L, P2L, $r9
-	slli    P3H, P3H, #16
-	move    $r15, #0xffff
-	and     $r10, P3L, $r15
-	or      P3H, P3H, $r10
-
-	#SUB(P3H, P1L)
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .Li22
-
-.Lb23:
-	addi    $r9, $r9, #-1
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .Lb23
-.Li22:
-	slli    P2H, P2H, #16
-	add     P2H, P2H, $r9
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x00
+	! ---------------------------------------------------------------------
+.LFzeroAexp:
+#ifdef __NDS32_EXT_PERF__
+	beqz	MANTA, .LFzeroA		! A is zero
 
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r0, $r9) is (high, low). */
-	move    $r10, $r1
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r3, $r7
+	! A is denorm
+	add	MANTA, MANTA, MANTA
+	clz	$r15, MANTA
+	sub	EXPOA, EXPOA, $r15
+	sll	MANTA, MANTA, $r15
+	b	.LFlab1
+
+.LFzeroA:
+	! A is zero
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOB, 0xff, .LFnan	! zero * inf = zero * NaN = NaN
 #else
-	pushm	$r2, $r5
-	move	$r0, $r3
-	movi	$r1, #0
-	move	$r2, $r7
-	movi	$r3, #0
-	bal	__muldi3
-	popm	$r2, $r5
+	beq	W1, EXPOB,.LFnan	! zero * inf = zero * NaN = NaN
 #endif
-	move    $r9, $r0
-	move    $r0, $r1
-	move    $r1, $r10
-#else /* __big_endian__ */
-/* For big endian: ($r1, $r9) is (high, low). */
-	move    $r10, $r0
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r2, $r8
-#else
-	pushm	$r2, $r5
-	move	$r1, $r2
-	movi	$r0, #0
-	move	$r3, $r8
-	movi	$r2, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move    $r9, $r1
-	move    $r1, $r0
-	move    $r0, $r10
-#endif /* __big_endian__ */
 
-	move    P3L, #0
+.LFzero:
+	move	$r0, SPROD
+	b	.LFret
+#else
+	bnez	MANTA, .LFloopA2	! A is denorm
 
-	#SUB(P3L, $r9)
-	move    $r15, P3L
-	sub     P3L, P3L, $r9
-	slt     $r15, $r15, P3L
-
-
-	#SUBCC(P3H, P1L)
-	beqzs8  .LL47
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .LL48
-	subi333 P3H, P3H, #1
-	j       .LL49
-.LL48:
-	move    $r15, P3H
-	subi333 P3H, P3H, #1
-	slt     $r15, $r15, P3H
-	j       .LL49
-.LL47:
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-.LL49:
+	! A is zero
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOB, 0xff, .LFnan	! zero * inf = zero * NaN = NaN
+#else
+	beq	W1, EXPOB,.LFnan	! zero * inf = zero * NaN = NaN
+#endif
 
-	beqzs8  .Li24
+.LFzero:
+	move	$r0, SPROD
+	b	.LFret
 
-.LGlab3:
-	addi    P2H, P2H, #-1
+.LFloopA:
+	addi	EXPOA, EXPOA, #-1
+.LFloopA2:
+	add	MANTA, MANTA, MANTA
+	slt	$r15, MANTA, SIGN
+	bnez	$r15, .LFloopA
+	b	.LFlab1
+#endif
 
-	#ADD(P3L, O1L)
-	add     P3L, P3L, O1L
-	slt     $r15, P3L, O1L
-
-
-	#ADDCC(P3H, O1H)
-	beqzs8  .LL50
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .LL51
-	addi    P3H, P3H, #0x1
-	j       .LL52
-.LL51:
-	move    $r15, #1
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-	j       .LL52
-.LL50:
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-.LL52:
-
-	beqzs8  .LGlab3
-.Li24:
-	bne     P3H, O1H, .Li25
-	move    P1L, O1L
-	move    P3H, P3L
-	move    $r9, #0
-	move    P2L, $r9
-	j       .Le25
-.Li25:
-	srli    P2L, O1H, #16
-	divr    $r9, P3H, P3H, P2L
-	move    $r10, #0xffff
-	and     $r10, O1H, $r10
-	mul     P1L, $r10, $r9
-	slli    P3H, P3H, #16
-	srli    $r15, P3L, #16
-	or      P3H, P3H, $r15
-
-	#SUB(P3H, P1L)
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .Li26
-
-.Lb27:
-	addi    $r9, $r9, #-1
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .Lb27
-.Li26:
-	divr    P2L, P3H, P3H, P2L
-	mul     P1L, $r10, P2L
-	slli    P3H, P3H, #16
-	move    $r10, #0xffff
-	and     $r10, P3L, $r10
-	or      P3H, P3H, $r10
-
-	#SUB(P3H, P1L)
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .Li28
-
-.Lb29:
-	addi    P2L, P2L, #-1
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .Lb29
-.Li28:
-	slli    $r9, $r9, #16
-	add     $r9, $r9, P2L
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0xff
+	! ---------------------------------------------------------------------
+.LFinfnanA:
+	bne	MANTA, SIGN, .LFnan	! A is NaN: NaN * B = NaN
 
-/* This is a 64-bit multiple. */
-#ifndef __big_endian__
-/* For little endian: ($r0, $r2) is (high, low). */
-	move    $r10, $r1
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r9, $r7
+	! A is inf
+	beqz	VALUB, .LFnan		! B is zero: inf * zero = NaN
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	bnec	EXPOB, 0xff, .LFinf	! B is finite: inf * B = inf
 #else
-	pushm	$r2, $r5
-	move	$r0, $r9
-	movi	$r1, #0
-	move	$r2, $r7
-	movi	$r3, #0
-	bal	__muldi3
-	popm	$r2, $r5
+	bne	W1, EXPOB, .LFinf	! B is finite: inf * B = inf
 #endif
-	move    $r2, $r0
-	move    $r0, $r1
-	move    $r1, $r10
-#else /* __big_endian__ */
-/* For big endian: ($r1, $r3) is (high, low). */
-	move	$r10, $r0
-#ifndef __NDS32_ISA_V3M__
-	mulr64	$r0, $r9, $r8
-#else
-	pushm	$r2, $r5
-	move	$r0, $r9
-	movi	$r1, #0
-	move	$r2, $r7
-	movi	$r3, #0
-	bal	__muldi3
-	popm	$r2, $r5
-#endif
-	move	$r3, $r1
-	move	$r1, $r0
-	move	$r0, $r10
-#endif /* __big_endian__ */
-
-.Le25:
-	move    P3L, #0
 
-	#SUB(P3L, P2L)
-	move    $r15, P3L
-	sub     P3L, P3L, P2L
-	slt     $r15, $r15, P3L
-
-
-	#SUBCC(P3H, P1L)
-	beqzs8  .LL53
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-	beqzs8  .LL54
-	subi333 P3H, P3H, #1
-	j       .LL55
-.LL54:
-	move    $r15, P3H
-	subi333 P3H, P3H, #1
-	slt     $r15, $r15, P3H
-	j       .LL55
-.LL53:
-	move    $r15, P3H
-	sub     P3H, P3H, P1L
-	slt     $r15, $r15, P3H
-.LL55:
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0xff
+	! ---------------------------------------------------------------------
+.LFinfnanB:
+	bne	MANTB, SIGN, .LFnan	! B is NaN: A * NaN = NaN
 
-	beqzs8  .Li30
+.LFinf:
+	move	$r0, #0x7f800000
+	b	.LFpack
 
-.LGlab4:
-	addi    $r9, $r9, #-1
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x00
+	! ---------------------------------------------------------------------
+.LFzeroB:
+#ifdef __NDS32_EXT_PERF__
+	beqz	MANTB, .LFzero		! B is zero
+
+	! B is denorm
+	add	MANTB, MANTB, MANTB
+	clz	$r15, MANTB
+	sub	EXPOB, EXPOB, $r15
+	sll	MANTB, MANTB, $r15
+#else
+	bnez	MANTB, .LFloopB2	! B is denorm
+	b	.LFzero			! B is zero
+.LFloopB:
+	addi	EXPOB, EXPOB, #-1
+.LFloopB2:
+	add	MANTB, MANTB, MANTB
+	slt	$r15, MANTB, SIGN
+	bnez	$r15, .LFloopB
+#endif
+	b	.LFlab2
 
-	#ADD(P3L, O1L)
-	add     P3L, P3L, O1L
-	slt     $r15, P3L, O1L
-
-
-	#ADDCC(P3H, O1H)
-	beqzs8  .LL56
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-	beqzs8  .LL57
-	addi    P3H, P3H, #0x1
-	j       .LL58
-.LL57:
-	move    $r15, #1
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-	j       .LL58
-.LL56:
-	add     P3H, P3H, O1H
-	slt     $r15, P3H, O1H
-.LL58:
-
-	beqzs8  .LGlab4
-.Li30:
-	sltsi   $r15, P2H, #0
-	bnezs8  .Li31
-
-	#ADD($r9, $r9)
-	move    $r15, $r9
-	add     $r9, $r9, $r9
-	slt     $r15, $r9, $r15
+.LFnan:
+	move	$r0, #0xffc00000
+	b	.LFret
 
-	#ADDC(P2H, P2H)
-	add     P2H, P2H, P2H
-	add     P2H, P2H, $r15
-	addi    $r6, $r6, #-1
-.Li31:
-	or      $r10, P3H, P3L
-	beqz    $r10, .Li32
-	ori     $r9, $r9, #1
-.Li32:
-	move    P3H, P2H
-	move    P3L, $r9
-	addi    $r10, $r6, #-1
-	slti    $r15, $r10, #0x7fe
-	beqzs8  .LGoveund
+	! ---------------------------------------------------------------------
+	! A*B underflow
+	! ---------------------------------------------------------------------
+.LFunder:
+	subri	W0, EXPOA, #1
+	slti	$r15, W0, #0x20
+	beqzs8	.LFzero
+	subri	W1, W0, #0x20
+	sll	EXPOA, MANTA, W1
+	srl	MANTA, MANTA, W0
+	beqz	EXPOA, .LFunder2
+	ori	MANTA, MANTA, #2
+.LFunder2:
+	addi	W1, MANTA, #0x80
+	sltsi	EXPOA, W1, #0
+	b	.LFround
+	.size	__mulsf3, .-__mulsf3
+#endif /* L_mul_sf */
 
-	#ADD(P3L, $0x400)
-	move    $r15, #0x400
-	add     P3L, P3L, $r15
-	slt     $r15, P3L, $r15
 
 
-	#ADDCC(P3H, $0x0)
-	beqzs8  .LL61
-	add     P3H, P3H, $r15
-	slt     $r15, P3H, $r15
-.LL61:
-
-	#ADDC($r6, $0x0)
-	add     $r6, $r6, $r15
-
-.LGlab8:
-	srli    $r10, P3L, #11
-	andi    $r10, $r10, #1
-	sub     P3L, P3L, $r10
-	srli    P1L, P3L, #11
-	slli    $r10, P3H, #21
-	or      P1L, P1L, $r10
-	slli    $r10, P3H, #1
-	srli    $r10, $r10, #12
-	or      P1H, P1H, $r10
-	slli    $r10, $r6, #20
-	or      P1H, P1H, $r10
+#ifdef L_div_sf
 
-.LGret:
-.LG999:
-	popm    $r6, $r10
-	pop     $lp
-	ret5    $lp
+#define SIGN	$r4
+#ifdef __NDS32_REDUCE_REGS__
+#define EXPOA	$r6
+#define MANTA	$r2
+#define VALUA	$r2
+#define EXPOB	$r7
+#define MANTB	$r3
+#define VALUB	$r1
+#define SQUOT	$r8
+#define W0	VALUB
+#define W1	$r5
+#define W2	$r0
+#define W3	$r9
+#else
+#define EXPOA	$r16
+#define MANTA	$r2
+#define VALUA	$r2
+#define EXPOB	$r17
+#define MANTB	$r3
+#define VALUB	$r1
+#define SQUOT	$r18
+#define W0	VALUB
+#define W1	$r5
+#define W2	$r0
+#define W3	$r19
+#endif
+
+#define DHI	W1	// high18(MANTB)
+#define DLO	W3	// low14(MANTB)
+#define QHI	W0	// MANTA / MANTB
+#define REM	W2	// MANTA % MANTB
 
-.LGoveund:
-	bgtz    $r6, .LGinf
-	subri   P2H, $r6, #1
-	move    P1L, #0
-.LL62:
-	move    $r10, #0x20
-	slt     $r15, P2H, $r10
-	bnezs8  .LL63
-	or      P1L, P1L, P3L
-	move    P3L, P3H
-	move    P3H, #0
-	addi    P2H, P2H, #0xffffffe0
-	bnez    P3L, .LL62
-.LL63:
-	beqz    P2H, .LL64
-	move    P2L, P3H
-	move    $r10, P3L
-	srl     P3L, P3L, P2H
-	srl     P3H, P3H, P2H
-	subri   P2H, P2H, #0x20
-	sll     P2L, P2L, P2H
-	or      P3L, P3L, P2L
-	sll     $r10, $r10, P2H
-	or      P1L, P1L, $r10
-	beqz    P1L, .LL64
-	ori     P3L, P3L, #1
-.LL64:
-	#ADD(P3L, $0x400)
-	move    $r15, #0x400
-	add     P3L, P3L, $r15
-	slt     $r15, P3L, $r15
+	.text
+	.align	2
+	.global	__divsf3
+	.type	__divsf3, @function
+__divsf3:
+#ifdef __NDS32_REDUCE_REGS__
+	smw.adm	$r6, [$sp], $r9, 0
+#endif
 
-	#ADDC(P3H, $0x0)
-	add     P3H, P3H, $r15
-	srli    $r6, P3H, #31
-	j       .LGlab8
-
-.LGspecA:
-	#ADD(P3L, P3L)
-	move    $r15, P3L
-	add     P3L, P3L, P3L
-	slt     $r15, P3L, $r15
-
-	#ADDC(P3H, P3H)
-	add     P3H, P3H, P3H
-	add     P3H, P3H, $r15
-	bnez    $r6, .Li33
-	or      $r10, P3H, P3L
-	beqz    $r10, .Li33
-
-
-	#NORMd($r4, P2H, P2L)
-	bnez    P3H, .LL65
-	bnez    P3L, .LL66
-	move    $r6, #0
-	j       .LL67
-.LL66:
-	move    P3H, P3L
-	move    P3L, #0
-	move    P2H, #32
-	sub     $r6, $r6, P2H
-.LL65:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r3, $r5
+	xor	SQUOT, $r1, $r0
+	move	SIGN, #0x80000000
+	and	SQUOT, SQUOT, SIGN	! sign(A xor B)
+	slli	VALUA, $r0, 1		! A<<1
+	slli	VALUB, $r1, 1		! B<<1
+	srli	EXPOA, VALUA, 24	! exponent(A)
+	srli	EXPOB, VALUB, 24	! exponent(B)
+	slli	MANTA, VALUA, 7		! mantissa(A)<<8
+	slli	MANTB, VALUB, 7		! mantissa(B)<<8
+	beqz	EXPOA, .LGzeroAexp	! exponent(A) is zero, goto .LGzeroAexp
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOA, 0xff, .LGinfnanA	! A is inf or NaN, goto .LGinfnanA
 #else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	move	$r0, $r5
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
+	move	W1, #0xff
+	beq	W1, EXPOA, .LGinfnanA	! A is inf or NaN, goto .LGinfnanA
 #endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, $r4
+	or	MANTA, MANTA, SIGN
+
+.LGlab1:
+	beqz	EXPOB, .LGzeroB		! exponent(B) is zero, goto .LGzeroB
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPOB, 0xff, .LGinfnanB	! B is inf or NaN, goto .LGinfnanB
 #else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, $r4
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
-#endif
-#endif /* __big_endian_ */
-	beqz    P2H, .LL67
-	sub     $r6, $r6, P2H
-	subri   P2L, P2H, #32
-	srl     P2L, P3L, P2L
-	sll     P3L, P3L, P2H
-	sll     P3H, P3H, P2H
-	or      P3H, P3H, P2L
-.LL67:
-	#NORMd End
+	beq	W1, EXPOB, .LGinfnanB	! B is inf or NaN, goto .LGinfnanB
+#endif
+	or	MANTB, MANTB, SIGN
 
-	j       .LGlab1
-.Li33:
-	bne     $r6, $r9, .Li35
-	slli    $r10, O1H, #1
-	or      $r10, $r10, O1L
-	beqz    $r10, .LGnan
-.Li35:
-	subri   $r15, $r9, #0x7ff
-	beqzs8  .LGspecB
-	beqz    $r6, .LGret
-	or      $r10, P3H, P3L
-	bnez    $r10, .LGnan
+.LGlab2:
+	slt	$r15, MANTA, MANTB
+	bnez	$r15, .LGlab3
+	srli	MANTA, MANTA, #1
+	addi	EXPOA, EXPOA, #1
 
-.LGinf:
-	move    $r10, #0x7ff00000
-	or      P1H, P1H, $r10
-	move    P1L, #0
-	j       .LGret
-
-.LGspecB:
-	#ADD(O1L, O1L)
-	move    $r15, O1L
-	add     O1L, O1L, O1L
-	slt     $r15, O1L, $r15
-
-	#ADDC(O1H, O1H)
-	add     O1H, O1H, O1H
-	add     O1H, O1H, $r15
-	bnez    $r9, .Li36
-	or      $r10, O1H, O1L
-	beqz    $r10, .LGinf
-
-
-	#NORMd($r7, P2H, P2L)
-	bnez    O1H, .LL68
-	bnez    O1L, .LL69
-	move    $r9, #0
-	j       .LL70
-.LL69:
-	move    O1H, O1L
-	move    O1L, #0
-	move    P2H, #32
-	sub     $r9, $r9, P2H
-.LL68:
-#ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
-	clz	$r3, $r8
-#else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	move	$r0, $r8
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
+	! ---------------------------------------------------------------------
+	! This is a 64-bit division.
+	! high part of dividend, MANTA, is smaller than divisor MANTB.
+	! ---------------------------------------------------------------------
+.LGlab3:
+	srli	DHI, MANTB, #14
+	andi	DLO, MANTB, #0x3fff
+	divr	QHI, REM, MANTA, DHI
+	mul	MANTA, DLO, QHI
+	slli	REM, REM, #14
+	slt	$r15, REM, MANTA
+	beqz	$r15, .LGlab4
+	addi	QHI, QHI, #-1
+	add	REM, REM, MANTB
+
+.LGlab4:
+	sub	REM, REM, MANTA
+	divr	MANTA, REM, REM, DHI
+	mul	DLO, DLO, MANTA
+	slli	REM, REM, #14
+	slt	$r15, REM, DLO
+	beqz	$r15, .LGlab5
+	addi	MANTA, MANTA, #-1
+	add	REM, REM, MANTB
+
+.LGlab5:
+	sub	REM, REM, DLO
+	slli	W3, QHI, #14
+	add	MANTA, MANTA, W3
+	slli	MANTA, MANTA, #4
+	beqz	REM, .LGlab6
+	ori	MANTA, MANTA, #1
+
+.LGlab6:
+	subri	W1, EXPOB, #0x7e
+	add	EXPOA, EXPOA, W1
+	blez	EXPOA, .LGunder		! A/B underflow, goto .LGunder
+	slti	$r15, EXPOA, #0xff
+	beqz	$r15, .LGinf		! A/B overflow, goto .LGinf
+
+	! ---------------------------------------------------------------------
+	! do rounding
+	! ---------------------------------------------------------------------
+.LGround:
+	addi	MANTA, MANTA, #128
+	slti	$r15, MANTA, #128
+	add	EXPOA, EXPOA, $r15
+	srli	W1, MANTA, #8
+	andi	W1, W1, #1
+	sub	MANTA, MANTA, W1
+
+	! ---------------------------------------------------------------------
+	! pack result
+	! ---------------------------------------------------------------------
+	slli	MANTA, MANTA, #1
+	srli	MANTA, MANTA, #9
+	slli	$r0, EXPOA, #23
+	or	$r0, $r0, MANTA
+.LGpack:
+	or	$r0, $r0, SQUOT
+
+.LGret:
+#ifdef __NDS32_REDUCE_REGS__
+	lmw.bim	$r6, [$sp], $r9, 0
 #endif
-#else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
-	clz	$r2, $r7
+	ret5	$lp
+
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0x00
+	! ---------------------------------------------------------------------
+.LGzeroAexp:
+#ifdef __NDS32_EXT_PERF__
+	add	MANTA, MANTA, MANTA
+	beqz	MANTA, .LGzeroA
+	clz	$r15, MANTA
+	sub	EXPOA, EXPOA, $r15
+	sll	MANTA, MANTA, $r15
 #else
-	pushm	$r0, $r1
-	pushm	$r3, $r5
-	move	$r0, $r7
-	bal	__clzsi2
-	move	$r2, $r0
-	popm	$r3, $r5
-	popm	$r0, $r1
+	bnez	MANTA, .LGloopA2
+	b	.LGzeroA
+.LGloopA:
+	addi	EXPOA, EXPOA, #-1
+.LGloopA2:
+	add	MANTA, MANTA, MANTA
+	slt	$r15, MANTA, SIGN
+	bnez	$r15, .LGloopA
 #endif
-#endif /* __big_endian__ */
-	beqz    P2H, .LL70
-	sub     $r9, $r9, P2H
-	subri   P2L, P2H, #32
-	srl     P2L, O1L, P2L
-	sll     O1L, O1L, P2H
-	sll     O1H, O1H, P2H
-	or      O1H, O1H, P2L
-.LL70:
-	#NORMd End
+	b	.LGlab1
+
+	! A is 0.0f
+.LGzeroA:
+	beqz	VALUB, .LGnan		! 0.0f / 0.0f = NaN
+	move	W1, 0xff000000
+	slt	$r15, W1, VALUB
+	bnez	$r15, .LGnan		! 0.0f / NaN = NaN
+
+.LGzero:
+	move	$r0, SQUOT
+	b	.LGret
+
+	! ---------------------------------------------------------------------
+	! exponent(A) is 0xff
+	! ---------------------------------------------------------------------
+.LGinfnanA:
+	bne	MANTA, SIGN, .LGnan	! A is NaN: NaN / B = NaN
+
+	! A if inf
+	beq	EXPOB, EXPOA, .LGnan	! no matter B is inf or NaN
+
+.LGinf:
+	move	$r0, #0x7f800000
+	or	$r0, $r0, SQUOT
+	b	.LGret
 
-	j       .LGlab2
-.Li36:
-	or      $r10, O1H, O1L
-	beqz    $r10, .Li38
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0xff
+	! ---------------------------------------------------------------------
+.LGinfnanB:
+	beq	MANTB, SIGN, .LGzero	! B is inf: A / inf = 0.0f
 
 .LGnan:
-	move    P1H, #0xfff80000
-.Li38:
-	move    P1L, #0
-	j       .LGret
-	.size __divdf3, .-__divdf3
-#endif /* L_div_df */
+	move	$r0, #0xffc00000
+	b	.LGret
+
+	! ---------------------------------------------------------------------
+	! exponent(B) is 0x00
+	! ---------------------------------------------------------------------
+.LGzeroB:
+#ifdef __NDS32_EXT_PERF__
+	add	MANTB, MANTB, MANTB
+	beqz	MANTB, .LGinf
+	clz	$r15, MANTB
+	sub	EXPOB, EXPOB, $r15
+	sll	MANTB, MANTB, $r15
+#else
+	bnez	MANTB, .LGloopB2
+	b	.LGinf
+.LGloopB:
+	addi	EXPOB, EXPOB, #-1
+.LGloopB2:
+	add	MANTB, MANTB, MANTB
+	slt	$r15, MANTB, SIGN
+	bnez	$r15, .LGloopB
+#endif
+	b	.LGlab2
+
+	! ---------------------------------------------------------------------
+	! A/B underflow
+	! ---------------------------------------------------------------------
+.LGunder:
+	subri	W0, EXPOA, #1
+	slti	$r15, W0, #0x20
+	beqzs8	.LGzero
+	subri	W1, W0, #0x20
+	sll	EXPOA, MANTA, W1
+	srl	MANTA, MANTA, W0
+	beqz	EXPOA, .LGunder2
+	ori	MANTA, MANTA, #2
+.LGunder2:
+	addi	W1, MANTA, #0x80
+	sltsi	EXPOA, W1, #0
+	b	.LGround
+	.size	__divsf3, .-__divsf3
+#endif /* L_div_sf */
 
 
 
@@ -3553,13 +3439,8 @@
 	.global	__negsf2
 	.type	__negsf2, @function
 __negsf2:
-	push    $lp
-
 	move    $r1, #0x80000000
 	xor     $r0, $r0, $r1
-
-.LN999:
-	pop     $lp
 	ret5    $lp
 	.size __negsf2, .-__negsf2
 #endif /* L_negate_sf */
@@ -3578,13 +3459,8 @@
 	.global	__negdf2
 	.type	__negdf2, @function
 __negdf2:
-	push    $lp
-
 	move    $r2, #0x80000000
 	xor     P1H, P1H, $r2
-
-.LP999:
-	pop     $lp
 	ret5    $lp
 	.size __negdf2, .-__negdf2
 #endif /* L_negate_df */
@@ -3594,64 +3470,83 @@
 #ifdef L_sf_to_df
 
 #ifndef __big_endian__
-	#define O1L     $r1
-	#define O1H     $r2
+	#define P1L	$r0
+	#define P1H	$r1
 #else
-	#define O1H     $r1
-	#define O1L     $r2
+	#define P1H	$r0
+	#define P1L	$r1
 #endif
+#define SIGN	$r2
+#define EXPO	$r3
+#define MANT	$r4
 	.text
 	.align	2
 	.global	__extendsfdf2
 	.type	__extendsfdf2, @function
 __extendsfdf2:
-	push    $lp
+	slli	$r5, $r0, 1
+	beqz	$r5, .LJzero		! A-in is zero, goto .LJzero
 
-	srli    $r3, $r0, #23
-	andi    $r3, $r3, #0xff
-	move    $r5, #0x80000000
-	and     O1H, $r0, $r5
-	addi    $r5, $r3, #-1
-	slti    $r15, $r5, #0xfe
-	beqzs8  .LJspec
+	srli	EXPO, $r5, #24		! exponent(A-in)
+	move	$r1, #0x80000000
+	and	SIGN, $r1, $r0		! sign(A-in)
+	slli	MANT, $r5, #8		! mantissa(A-in)
+	beqz	EXPO, .LJdenorm		! exponent(A-in) is zerop, goto .LJdenorm
+#ifndef __FAST_MATH__
+#if defined(__NDS32_ISA_V3__)||defined(__NDS32_ISA_V3M__)
+	beqc	EXPO, #0xff, .LJinfnan	! exponent(A-in) is 0xff, goto .LJinfnan
+#else
+	slti	$r15, EXPO, #0xff
+	beqzs8	.LJinfnan		! exponent(A-in) is 0xff, goto .LJinfnan
+#endif
+#endif // end of __FAST_MATH__
 
 .LJlab1:
-	addi    $r3, $r3, #0x380
-	slli    $r5, $r0, #9
-	srli    $r5, $r5, #12
-	or      O1H, O1H, $r5
-	slli    O1L, $r0, #29
+	addi	EXPO, EXPO, #0x380	! exponent(A-out)
+	slli	P1L, MANT, #20		! low 32-bit(A-out)
+	srli	P1H, MANT, #12		! high 20-bit mantissa(A-out)
+	or	P1H, P1H, SIGN
+	slli	EXPO, EXPO, #20
+	or	P1H, P1H, EXPO		! high 32-bit(-out)
+	ret5	$lp
+
+#ifdef __NDS32_EXT_PERF__
+.LJdenorm:
+	clz	$r1, MANT
+	sub	EXPO, EXPO, $r1
+	sll	MANT, MANT, $r1
+#else
+.LJdenorm2:
+	addi	EXPO, EXPO, #-1
+	add	MANT, MANT, MANT
+.LJdenorm:
+	slt	$r15, MANT, $r1
+	bnezs8	.LJdenorm2
+#endif
+	slli	MANT, MANT, 1		! shift out implied 1
+	b	.LJlab1
+#ifndef __FAST_MATH__
+.LJinfnan:
+	beqz	MANT, .LJinf
+	move	P1H, 0xfff80000
+	b	.LJcont
 
-.LJret:
-	slli    $r5, $r3, #20
-	or      O1H, O1H, $r5
-	move    $r0, $r1
-	move    $r1, $r2
-
-.LJ999:
-	pop     $lp
-	ret5    $lp
+.LJinf:
+	move	$r5, 0x700000
+#ifdef __big_endian__
+	or	P1H, $r0, $r5
+#else
+	or	$r0, $r0, $r5
+#endif
+#endif // end of __FAST_MATH__
+.LJzero:
+#ifndef __big_endian__
+	move	P1H, $r0
+#endif
 
-.LJspec:
-	move    O1L, #0
-	add     $r0, $r0, $r0
-	beqz    $r0, .LJret
-	bnez    $r3, .Li42
-
-.Lb43:
-	addi    $r3, $r3, #-1
-	add     $r0, $r0, $r0
-	move    $r5, #0x800000
-	slt     $r15, $r0, $r5
-	bnezs8  .Lb43
-	j       .LJlab1
-.Li42:
-	move    $r3, #0x7ff
-	move    $r5, #0xff000000
-	slt     $r15, $r5, $r0
-	beqzs8  .LJret
-	move    O1H, #0xfff80000
-	j       .LJret
+.LJcont:
+	move	P1L, 0
+	ret5	$lp
 	.size __extendsfdf2, .-__extendsfdf2
 #endif /* L_sf_to_df */
 
@@ -3675,7 +3570,6 @@
 	.global	__truncdfsf2
 	.type	__truncdfsf2, @function
 __truncdfsf2:
-	push    $lp
 	pushm   $r6, $r8
 
 	slli    P2H, P1H, #11
@@ -3714,7 +3608,6 @@
 
 .LK999:
 	popm    $r6, $r8
-	pop     $lp
 	ret5    $lp
 
 .LKspec:
@@ -3724,20 +3617,20 @@
 	or      $r7, $r7, P2L
 	beqz    $r7, .Li46
 	move    $r0, #0xffc00000
-	j       .LK999
+	b       .LK999
 .Li46:
 	sltsi   $r15, $r4, #0xff
 	bnezs8  .Li48
 	move    $r7, #0x7f800000
 	or      $r0, $r5, $r7
-	j       .LK999
+	b       .LK999
 .Li48:
 	subri   $r6, $r4, #1
 	move    $r7, #0x20
 	slt     $r15, $r6, $r7
 	bnezs8  .Li49
 	move    $r0, $r5
-	j       .LK999
+	b       .LK999
 .Li49:
 	subri   $r8, $r6, #0x20
 	sll     $r7, P2H, $r8
@@ -3746,13 +3639,13 @@
 	move    $r4, #0
 	move    $r7, #0x80000000
 	or      P2H, P2H, $r7
-	j       .LKlab1
+	b       .LKlab1
 	.size __truncdfsf2, .-__truncdfsf2
 #endif /* L_df_to_sf */
 
 
 
-#ifdef L_df_to_si
+#ifdef L_fixdfsi
 
 #ifndef __big_endian__
 	#define P1L     $r0
@@ -3764,20 +3657,25 @@
 	.global	__fixdfsi
 	.type	__fixdfsi, @function
 __fixdfsi:
-	push    $lp
-	pushm   $r6, $r6
-
+#if defined(__NDS32_EXT_FPU_DP)
+        fd2si.z   $fs0, $fd0
+        fmfsr   $r0, $fs0
+        ret5    $lp
+#else
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfdr   $r0, $fd0
+#endif
 	slli    $r3, P1H, #11
-	srli    $r6, P1L, #21
-	or      $r3, $r3, $r6
-	move    $r6, #0x80000000
-	or      $r3, $r3, $r6
-	slli    $r6, P1H, #1
-	srli    $r6, $r6, #21
-	subri   $r2, $r6, #0x41e
+	srli    $r4, P1L, #21
+	or      $r3, $r3, $r4
+	move    $r4, #0x80000000
+	or      $r3, $r3, $r4
+	slli    $r4, P1H, #1
+	srli    $r4, $r4, #21
+	subri   $r2, $r4, #0x41e
 	blez    $r2, .LLnaninf
-	move    $r6, #0x20
-	slt     $r15, $r2, $r6
+	move    $r4, #0x20
+	slt     $r15, $r2, $r4
 	bnezs8  .LL72
 	move    $r3, #0
 .LL72:
@@ -3787,26 +3685,23 @@
 	subri   $r3, $r3, #0
 .Li50:
 	move    $r0, $r3
-
-.LL999:
-	popm    $r6, $r6
-	pop     $lp
 	ret5    $lp
 
 .LLnaninf:
 	beqz    P1L, .Li51
 	ori     P1H, P1H, #1
 .Li51:
-	move    $r6, #0x7ff00000
-	slt     $r15, $r6, P1H
+	move    $r4, #0x7ff00000
+	slt     $r15, $r4, P1H
 	beqzs8  .Li52
 	move    $r0, #0x80000000
-	j       .LL999
+	ret5    $lp
 .Li52:
 	move    $r0, #0x7fffffff
-	j       .LL999
+	ret5    $lp
+#endif
 	.size __fixdfsi, .-__fixdfsi
-#endif /* L_df_to_si */
+#endif /* L_fixdfsi */
 
 
 
@@ -3824,8 +3719,9 @@
 	.global	__fixsfdi
 	.type	__fixsfdi, @function
 __fixsfdi:
-	push    $lp
-
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfsr   $r0, $fs0
+#endif
 	srli    $r3, $r0, #23
 	andi    $r3, $r3, #0xff
 	slli    O1H, $r0, #8
@@ -3864,9 +3760,6 @@
 .LCret:
 	move    $r0, $r1
 	move    $r1, $r2
-
-.LC999:
-	pop     $lp
 	ret5    $lp
 
 .LCinfnan:
@@ -3879,11 +3772,11 @@
 
 .LCret3:
 	move    O1H, #0x80000000
-	j       .LCret
+	b       .LCret
 .Li7:
 	move    O1H, #0x7fffffff
 	move    O1L, #-1
-	j       .LCret
+	b       .LCret
 	.size	__fixsfdi, .-__fixsfdi
 #endif /* L_fixsfdi */
 
@@ -3907,9 +3800,10 @@
 	.global	__fixdfdi
 	.type	__fixdfdi, @function
 __fixdfdi:
-	push    $lp
 	pushm   $r6, $r6
-
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfdr   $r0, $fd0
+#endif
 	slli    $r5, P1H, #1
 	srli    $r5, $r5, #21
 	slli    O1H, P1H, #11
@@ -3950,10 +3844,7 @@
 .LCret:
 	move    P1L, O1L
 	move    P1H, O1H
-
-.LC999:
 	popm    $r6, $r6
-	pop     $lp
 	ret5    $lp
 
 .LCnaninf:
@@ -3968,56 +3859,48 @@
 .LCret3:
 	move    O1H, #0x80000000
 	move    O1L, #0
-	j       .LCret
+	b       .LCret
 .Li5:
 	move    O1H, #0x7fffffff
 	move    O1L, #-1
-	j       .LCret
+	b       .LCret
 	.size	__fixdfdi, .-__fixdfdi
 #endif /* L_fixdfdi */
 
 
 
 #ifdef L_fixunssfsi
-
 	.global	__fixunssfsi
 	.type	__fixunssfsi, @function
 __fixunssfsi:
-	push    $lp
-
-	slli    $r1, $r0, #8
-	move    $r3, #0x80000000
-	or      $r1, $r1, $r3
-	srli    $r3, $r0, #23
-	andi    $r3, $r3, #0xff
-	subri   $r2, $r3, #0x9e
-	sltsi   $r15, $r2, #0
-	bnezs8  .LLspec
-	sltsi   $r15, $r2, #0x20
-	bnezs8  .Li45
-	move    $r0, #0
-	j       .LL999
-.Li45:
-	srl     $r1, $r1, $r2
-	sltsi   $r15, $r0, #0
-	beqzs8  .Li46
-	subri   $r1, $r1, #0
-.Li46:
-	move    $r0, $r1
-
-.LL999:
-	pop     $lp
-	ret5    $lp
-
-.LLspec:
-	move    $r3, #0x7f800000
-	slt     $r15, $r3, $r0
-	beqzs8  .Li47
-	move    $r0, #0x80000000
-	j       .LL999
-.Li47:
-	move    $r0, #-1
-	j       .LL999
+#if defined(__NDS32_EXT_FPU_SP)
+        fs2ui.z   $fs0, $fs0
+        fmfsr   $r0, $fs0
+	ret5 $lp
+#else
+	bltz $r0,  .LZero	/* negative, return 0 */
+	srli $r3,$r0,#0x17
+	addi $r3,$r3,#-127
+	bltz $r3,  .LZero	/* too small, return 0 */
+	sltsi $r15,$r3,#0x20
+	beqzs8   .LMax		/* too big, return MAX */
+	slli $r0,$r0,#0x8
+#ifdef __NDS32_EXT_PERF__
+	bset $r1,$r0,#0x1f
+#else
+	sethi $r2,#0x80000
+	or $r1,$r0,$r2
+#endif
+	subri $r0,$r3,#0x1f
+	srl $r0,$r1,$r0
+	ret5 $lp
+.LZero:
+	movi55 $r0,#0x0
+	ret5 $lp
+.LMax:
+	movi55 $r0,#-1
+	ret5 $lp
+#endif
 	.size	__fixunssfsi, .-__fixunssfsi
 #endif /* L_fixunssfsi */
 
@@ -4037,21 +3920,26 @@
 	.global	__fixunsdfsi
 	.type	__fixunsdfsi, @function
 __fixunsdfsi:
-	push    $lp
-	pushm   $r6, $r6
-
+#if defined(__NDS32_EXT_FPU_DP)
+        fd2ui.z  $fs0, $fd0
+        fmfsr   $r0, $fs0
+        ret5    $lp
+#else
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfdr   $r0, $fd0
+#endif
 	slli    $r3, P1H, #11
-	srli    $r6, P1L, #21
-	or      $r3, $r3, $r6
-	move    $r6, #0x80000000
-	or      $r3, $r3, $r6
-	slli    $r6, P1H, #1
-	srli    $r6, $r6, #21
-	subri   $r2, $r6, #0x41e
+	srli    $r4, P1L, #21
+	or      $r3, $r3, $r4
+	move    $r4, #0x80000000
+	or      $r3, $r3, $r4
+	slli    $r4, P1H, #1
+	srli    $r4, $r4, #21
+	subri   $r2, $r4, #0x41e
 	sltsi   $r15, $r2, #0
 	bnezs8  .LNnaninf
-	move    $r6, #0x20
-	slt     $r15, $r2, $r6
+	move    $r4, #0x20
+	slt     $r15, $r2, $r4
 	bnezs8  .LL73
 	move    $r3, #0
 .LL73:
@@ -4061,92 +3949,86 @@
 	subri   $r3, $r3, #0
 .Li53:
 	move    $r0, $r3
-
-.LN999:
-	popm    $r6, $r6
-	pop     $lp
 	ret5    $lp
 
 .LNnaninf:
 	beqz    P1L, .Li54
 	ori     P1H, P1H, #1
 .Li54:
-	move    $r6, #0x7ff00000
-	slt     $r15, $r6, P1H
+	move    $r4, #0x7ff00000
+	slt     $r15, $r4, P1H
 	beqzs8  .Li55
 	move    $r0, #0x80000000
-	j       .LN999
+	ret5    $lp
 .Li55:
 	move    $r0, #-1
-	j       .LN999
+	ret5    $lp
+#endif
 	.size __fixunsdfsi, .-__fixunsdfsi
 #endif /* L_fixunsdfsi */
 
 
 
 #ifdef L_fixunssfdi
-
-#ifndef __big_endian__
-	#define O1L     $r1
-	#define O1H     $r2
-#else
-	#define O1H     $r1
-	#define O1L     $r2
-#endif
 	.text
 	.align	2
 	.global	__fixunssfdi
 	.type	__fixunssfdi, @function
 __fixunssfdi:
-	push    $lp
-
-	srli    $r3, $r0, #23
-	andi    $r3, $r3, #0xff
-	slli    O1H, $r0, #8
-	move    $r5, #0x80000000
-	or      O1H, O1H, $r5
-	move    O1L, #0
-	sltsi   $r15, $r3, #0xbe
-	beqzs8  .LDinfnan
-	subri   $r3, $r3, #0xbe
-.LL12:
-	move    $r5, #0x20
-	slt     $r15, $r3, $r5
-	bnezs8  .LL13
-	move    O1L, O1H
-	move    O1H, #0
-	addi    $r3, $r3, #0xffffffe0
-	bnez    O1L, .LL12
-.LL13:
-	beqz    $r3, .LL14
-	move    $r4, O1H
-	srl     O1L, O1L, $r3
-	srl     O1H, O1H, $r3
-	subri   $r3, $r3, #0x20
-	sll     $r4, $r4, $r3
-	or      O1L, O1L, $r4
-.LL14:
-	sltsi   $r15, $r0, #0
-	beqzs8  .LDret
-
-	subri   O1H, O1H, #0
-	beqz    O1L, .LL15
-	subri   O1L, O1L, #0
-	subi45  O1H, #1
-.LL15:
-
-.LDret:
-	move    $r0, $r1
-	move    $r1, $r2
-
-.LD999:
-	pop     $lp
-	ret5    $lp
-
-.LDinfnan:
-	move    O1H, #0x80000000
-	move    O1L, #0
-	j       .LDret
+#define INPUT $r0
+#define EXP $r2
+#define TMP $r3
+#define REAL_EXP $r2
+#ifndef __big_endian__
+#define MANL $r0
+#define MANH $r1
+#else
+#define MANL $r1
+#define MANH $r0
+#endif
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfsr   $r0, $fs0
+#endif
+	bltz INPUT, .LZero !negative, return 0
+
+	srli EXP,INPUT,#0x17
+	addi REAL_EXP, EXP,#-127
+	bltz REAL_EXP, .LZero  ! too small, return 0
+
+	sltsi $r15,REAL_EXP,#0x40 ! too large, return Max
+	beqzs8 .LMax
+
+	slli MANL,INPUT,#0x8
+#ifdef	__NDS32_EXT_PERF__
+	bset MANL,MANL,#0x1f
+#else
+	sethi TMP,#0x80000
+	or33 MANL,TMP
+#endif
+	subri TMP,REAL_EXP,#0x1f
+	bltz TMP,.Lgt31  ! real_exp > 32
+
+	! real_exp <= 31
+	srl MANL,MANL,TMP
+	movi55 MANH,#0x0
+	ret5 $lp
+
+.Lgt31:
+	subri REAL_EXP,REAL_EXP,#0x3f
+	neg33 TMP,TMP
+	srl MANH,MANL,REAL_EXP
+	sll MANL,MANL,TMP
+	beqc TMP, #0x20, .LClrL
+	ret5 $lp
+.LZero:
+	movi55 MANH,#0x0
+.LClrL:
+	movi55 MANL,#0x0
+	ret5 $lp
+.LMax:
+	movi55 MANL,#-1
+	movi55 MANH,#-1
+	ret5 $lp
 	.size	__fixunssfdi, .-__fixunssfdi
 #endif /* L_fixunssfdi */
 
@@ -4170,9 +4052,10 @@
 	.global	__fixunsdfdi
 	.type	__fixunsdfdi, @function
 __fixunsdfdi:
-	push    $lp
 	pushm   $r6, $r6
-
+#if defined(__NDS32_EXT_FPU_SP)
+        fmfdr   $r0, $fd0
+#endif
 	slli    $r5, P1H, #1
 	srli    $r5, $r5, #21
 	slli    O1H, P1H, #11
@@ -4213,16 +4096,13 @@
 .LDret:
 	move    P1L, O1L
 	move    P1H, O1H
-
-.LD999:
 	popm    $r6, $r6
-	pop     $lp
 	ret5    $lp
 
 .LDnaninf:
 	move    O1H, #0x80000000
 	move    O1L, #0
-	j       .LDret
+	b       .LDret
 	.size	__fixunsdfdi, .-__fixunsdfdi
 #endif /* L_fixunsdfdi */
 
@@ -4230,54 +4110,59 @@
 
 #ifdef L_si_to_sf
 
+#define MANTA	$r0
+#define EXPOA	$r1
+
 	.text
 	.align	2
 	.global	__floatsisf
 	.type	__floatsisf, @function
 __floatsisf:
-	push    $lp
-
-	move    $r4, #0x80000000
-	and     $r2, $r0, $r4
-	beqz    $r0, .Li39
-	sltsi   $r15, $r0, #0
-	beqzs8  .Li40
-	subri   $r0, $r0, #0
-.Li40:
-	move    $r1, #0x9e
-#ifdef __NDS32_PERF_EXT__
+	beqz	$r0, .LKzero		! A is zero
+	move	$r4, #0x80000000
+	and	$r2, $r0, $r4		! sign(A)
+	beqz	$r2, .LKcont
+	subri	$r0, $r0, #0
+
+	! abs(A)
+.LKcont:
+	move	EXPOA, #0x9e
+#ifdef __NDS32_EXT_PERF__
 	clz	$r3, $r0
+	sll	MANTA, MANTA, $r3
+	sub	EXPOA, EXPOA, $r3
 #else
-	pushm	$r0, $r2
-	pushm	$r4, $r5
-	bal	__clzsi2
-	move	$r3, $r0
-	popm	$r4, $r5
-	popm	$r0, $r2
-#endif
-	sub     $r1, $r1, $r3
-	sll     $r0, $r0, $r3
-
-	#ADD($r0, $0x80)
-	move    $r15, #0x80
-	add     $r0, $r0, $r15
-	slt     $r15, $r0, $r15
-
-	#ADDC($r1, $0x0)
-	add     $r1, $r1, $r15
-	srai    $r4, $r0, #8
-	andi    $r4, $r4, #1
-	sub     $r0, $r0, $r4
-	slli    $r0, $r0, #1
-	srli    $r0, $r0, #9
-	slli    $r4, $r1, #23
-	or      $r0, $r0, $r4
-.Li39:
-	or      $r0, $r0, $r2
+	move	$r5, 16
+	move	$r3, 0
+.LKloop:
+	add	$r3, $r3, $r5
+	srl	$r15, MANTA, $r3
+	bnez	$r15, .LKloop2
+	sll	MANTA, MANTA, $r5
+	sub	EXPOA, EXPOA, $r5
+.LKloop2:
+	srli	$r5, $r5, #1
+	bnez	$r5, .LKloop
+#endif
+
+	! do rounding
+	srli	$r4, $r4, #24		! 0x80
+	add	MANTA, MANTA, $r4
+	slt	$r15, MANTA, $r4
+	add	EXPOA, EXPOA, $r15
+	srai	$r4, MANTA, #8
+	andi	$r4, $r4, #1
+	sub	MANTA, MANTA, $r4
+	slli	MANTA, MANTA, #1	! shift out implied 1
+
+	! pack
+	srli	MANTA, MANTA, #9
+	slli	$r4, EXPOA, #23
+	or	$r0, MANTA, $r4
+	or	$r0, $r0, $r2
 
-.LH999:
-	pop     $lp
-	ret5    $lp
+.LKzero:
+	ret5	$lp
 	.size	__floatsisf, .-__floatsisf
 #endif /* L_si_to_sf */
 
@@ -4301,8 +4186,11 @@
 	.global	__floatsidf
 	.type	__floatsidf, @function
 __floatsidf:
-	push    $lp
-	pushm   $r6, $r6
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r6, 0
+#else
+	smw.adm	$r6, [$sp], $r6, 2
+#endif
 
 	move    O1L, #0
 	move    O2H, O1L
@@ -4321,7 +4209,7 @@
 .Li40:
 	move    $r3, #0x41e
 #ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r4, $r2
 #else
 	pushm	$r0, $r3
@@ -4333,7 +4221,7 @@
 	popm	$r0, $r3
 #endif
 #else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r5, $r1
 #else
 	pushm	$r0, $r4
@@ -4357,9 +4245,11 @@
 	move    $r0, $r4
 	move    $r1, $r5
 
-.LH999:
-	popm    $r6, $r6
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r6, 0
+#else
+	lmw.bim	$r6, [$sp], $r6, 2
+#endif
 	ret5    $lp
 	.size __floatsidf, .-__floatsidf
 #endif /* L_si_to_df */
@@ -4384,8 +4274,11 @@
 	.global	__floatdisf
 	.type	__floatdisf, @function
 __floatdisf:
-	push    $lp
-	pushm   $r6, $r7
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r7, 0
+#else
+	smw.adm	$r6, [$sp], $r7, 2
+#endif
 
 	move    $r7, #0x80000000
 	and     $r5, P1H, $r7
@@ -4409,14 +4302,14 @@
 	bnez    P2H, .LL2
 	bnez    P2L, .LL3
 	move    $r4, #0
-	j       .LL4
+	b       .LL4
 .LL3:
 	move    P2H, P2L
 	move    P2L, #0
 	move    $r6, #32
 	sub     $r4, $r4, $r6
 .LL2:
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r6, P2H
 #else
 	pushm	$r0, $r5
@@ -4456,8 +4349,11 @@
 	or      $r0, P2H, $r5
 
 .LA999:
-	popm    $r6, $r7
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r7, 0
+#else
+	lmw.bim	$r6, [$sp], $r7, 2
+#endif
 	ret5    $lp
 	.size	__floatdisf, .-__floatdisf
 #endif /* L_floatdisf */
@@ -4486,8 +4382,11 @@
 	.global	__floatdidf
 	.type	__floatdidf, @function
 __floatdidf:
-	push    $lp
-	pushm   $r6, $r8
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r8, 0
+#else
+	smw.adm	$r6, [$sp], $r8, 2
+#endif
 
 	move    $r4, #0
 	move    $r7, $r4
@@ -4511,16 +4410,16 @@
 	bnez    P2H, .LL2
 	bnez    P2L, .LL3
 	move    $r4, #0
-	j       .LL4
+	b       .LL4
 .LL3:
 	move    P2H, P2L
 	move    P2L, #0
 	move    O1H, #32
 	sub     $r4, $r4, O1H
 .LL2:
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	O1H, P2H
-#else /* not __NDS32_PERF_EXT__ */
+#else /* not __NDS32_EXT_PERF__ */
 /*
   Replace clz with function call.
 	clz     O1H, P2H
@@ -4540,7 +4439,7 @@
 	move	$r5, $r0
 	popm	$r0, $r4
 #endif
-#endif /* not __NDS32_PERF_EXT__ */
+#endif /* not __NDS32_EXT_PERF__ */
 	beqz    O1H, .LL4
 	sub     $r4, $r4, O1H
 	subri   O1L, O1H, #32
@@ -4581,8 +4480,11 @@
 	move    P1H, O1H
 
 .LA999:
-	popm    $r6, $r8
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r8, 0
+#else
+	lmw.bim	$r6, [$sp], $r8, 2
+#endif
 	ret5    $lp
 	.size	__floatdidf, .-__floatdidf
 #endif /* L_floatdidf */
@@ -4591,48 +4493,51 @@
 
 #ifdef L_floatunsisf
 
+#define MANTA	$r0
+#define EXPOA	$r1
+
 	.text
 	.align	2
 	.global	__floatunsisf
 	.type	__floatunsisf, @function
 __floatunsisf:
-	push    $lp
-
-	beqz    $r0, .Li41
-	move    $r2, #0x9e
-#ifdef __NDS32_PERF_EXT__
-	clz	$r1, $r0
-#else
-	push	$r0
-	pushm	$r2, $r5
-	bal	__clzsi2
-	move	$r1, $r0
-	popm	$r2, $r5
-	pop	$r0
-#endif
-
-	sub     $r2, $r2, $r1
-	sll     $r0, $r0, $r1
+	beqz	$r0, .LKzero		! A is zero
 
-	#ADD($r0, $0x80)
-	move    $r15, #0x80
-	add     $r0, $r0, $r15
-	slt     $r15, $r0, $r15
-
-	#ADDC($r2, $0x0)
-	add     $r2, $r2, $r15
-	srli    $r3, $r0, #8
-	andi    $r3, $r3, #1
-	sub     $r0, $r0, $r3
-	slli    $r0, $r0, #1
-	srli    $r0, $r0, #9
-	slli    $r3, $r2, #23
-	or      $r0, $r0, $r3
+	move	EXPOA, #0x9e
+#ifdef __NDS32_EXT_PERF__
+	clz	$r5, $r0
+	sll	MANTA, MANTA, $r5
+	sub	EXPOA, EXPOA, $r5
+#else
+	move	$r5, 16
+	move	$r3, 0
+.LKloop:
+	add	$r3, $r3, $r5
+	srl	$r15, MANTA, $r3
+	bnez	$r15, .LKloop2
+	sll	MANTA, MANTA, $r5
+	sub	EXPOA, EXPOA, $r5
+.LKloop2:
+	srli	$r5, $r5, #1
+	bnez	$r5, .LKloop
+#endif
+
+	! do rounding
+	addi	MANTA, MANTA, #128
+	slti	$r15, MANTA, #128
+	add	EXPOA, EXPOA, $r15
+	srli	$r4, MANTA, #8
+	andi	$r4, $r4, #1
+	sub	MANTA, MANTA, $r4
+	slli	MANTA, MANTA, #1	! shift out implied 1
+
+	! pack
+	srli	MANTA, MANTA, #9
+	slli	$r4, EXPOA, #23
+	or	$r0, MANTA, $r4
 
-.Li41:
-.LI999:
-	pop     $lp
-	ret5    $lp
+.LKzero:
+	ret5	$lp
 	.size	__floatunsisf, .-__floatunsisf
 #endif /* L_floatunsisf */
 
@@ -4656,8 +4561,11 @@
 	.global	__floatunsidf
 	.type	__floatunsidf, @function
 __floatunsidf:
-	push    $lp
-	pushm   $r6, $r6
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r6, 0
+#else
+	smw.adm	$r6, [$sp], $r6, 2
+#endif
 
 	move    O1L, #0
 	move    $r3, O1L
@@ -4665,7 +4573,7 @@
 	beqz    O1H, .Li41
 	move    $r3, #0x41e
 #ifndef __big_endian__
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r5, $r2
 #else
 	pushm	$r0, $r4
@@ -4675,7 +4583,7 @@
 	popm	$r0, $r4
 #endif
 #else /* __big_endian__ */
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r4, $r1
 #else
 	pushm	$r0, $r3
@@ -4700,9 +4608,11 @@
 	move    $r0, $r4
 	move    $r1, $r5
 
-.LI999:
-	popm    $r6, $r6
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r6, 0
+#else
+	lmw.bim	$r6, [$sp], $r6, 2
+#endif
 	ret5    $lp
 	.size __floatunsidf, .-__floatunsidf
 #endif /* L_floatunsidf */
@@ -4727,8 +4637,11 @@
 	.global	__floatundisf
 	.type	__floatundisf, @function
 __floatundisf:
-	push    $lp
-	pushm   $r6, $r6
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r6, 0
+#else
+	smw.adm	$r6, [$sp], $r6, 2
+#endif
 
 	move    P2H, P1H
 	move    P2L, P1L
@@ -4741,14 +4654,14 @@
 	bnez    P2H, .LL5
 	bnez    P2L, .LL6
 	move    $r4, #0
-	j       .LL7
+	b       .LL7
 .LL6:
 	move    P2H, P2L
 	move    P2L, #0
 	move    $r5, #32
 	sub     $r4, $r4, $r5
 .LL5:
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	$r5, P2H
 #else
 	pushm	$r0, $r4
@@ -4788,8 +4701,11 @@
 	move    $r0, P2H
 
 .LB999:
-	popm    $r6, $r6
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r6, 0
+#else
+	lmw.bim	$r6, [$sp], $r6, 2
+#endif
 	ret5    $lp
 	.size	__floatundisf, .-__floatundisf
 #endif /* L_floatundisf */
@@ -4818,8 +4734,11 @@
 	.global	__floatundidf
 	.type	__floatundidf, @function
 __floatundidf:
-	push    $lp
-	pushm   $r6, $r7
+#ifdef __NDS32_EXT_PERF__
+	smw.adm	$r6, [$sp], $r7, 0
+#else
+	smw.adm	$r6, [$sp], $r7, 2
+#endif
 
 	move    $r4, #0
 	move    P2H, P1H
@@ -4833,16 +4752,16 @@
 	bnez    P2H, .LL8
 	bnez    P2L, .LL9
 	move    $r4, #0
-	j       .LL10
+	b       .LL10
 .LL9:
 	move    P2H, P2L
 	move    P2L, #0
 	move    O1H, #32
 	sub     $r4, $r4, O1H
 .LL8:
-#ifdef __NDS32_PERF_EXT__
+#ifdef __NDS32_EXT_PERF__
 	clz	O1H, P2H
-#else /* not __NDS32_PERF_EXT__ */
+#else /* not __NDS32_EXT_PERF__ */
 /*
   Replace clz with function call.
 	clz     O1H, P2H
@@ -4862,7 +4781,7 @@
 	move	$r5, $r0
 	popm	$r0, $r4
 #endif
-#endif /* not __NDS32_PERF_EXT__ */
+#endif /* not __NDS32_EXT_PERF__ */
 	beqz    O1H, .LL10
 	sub     $r4, $r4, O1H
 	subri   O1L, O1H, #32
@@ -4902,8 +4821,11 @@
 	move    P1H, O1H
 
 .LB999:
-	popm    $r6, $r7
-	pop     $lp
+#ifdef __NDS32_EXT_PERF__
+	lmw.bim	$r6, [$sp], $r7, 0
+#else
+	lmw.bim	$r6, [$sp], $r7, 2
+#endif
 	ret5    $lp
 	.size	__floatundidf, .-__floatundidf
 #endif /* L_floatundidf */
@@ -4914,78 +4836,121 @@
 
 	.text
 	.align	2
-	.global	__cmpsf2
-	.type	__cmpsf2, @function
-__cmpsf2:
+	.global	__gtsf2
+	.type	__gtsf2, @function
+__gtsf2:
+	! ---------------------------------------------------------------------
+	! int __gtsf2(float a, float b):
+	! This function returns a value greater than zero if neither argument
+	! is NaN and a is strictly greater than b.
+	! ---------------------------------------------------------------------
+	.global	__gesf2
+	.type	__gesf2, @function
+__gesf2:
+	! ---------------------------------------------------------------------
+	! int __gesf2(float a, float b):
+	! This function returns a value greater than or equal to zero if
+	! neither argument is NaN and a is greater than or equal to b.
+	! ---------------------------------------------------------------------
+	move	$r4, #-1
+	b	.LA
+
 	.global	__eqsf2
 	.type	__eqsf2, @function
 __eqsf2:
-	.global	__ltsf2
-	.type	__ltsf2, @function
-__ltsf2:
-	.global	__lesf2
-	.type	__lesf2, @function
-__lesf2:
+	! ---------------------------------------------------------------------
+	! int __eqsf2(float a, float b):
+	! This function returns zero value if neither argument is NaN,
+	! and a and b are equal.
+	! ---------------------------------------------------------------------
 	.global	__nesf2
 	.type	__nesf2, @function
 __nesf2:
+	! ---------------------------------------------------------------------
+	! int __nesf2(float a, float b):
+	! This function returns a nonzero value if either argument is NaN or if
+	! a and b are unequal.
+	! ---------------------------------------------------------------------
+	.global	__lesf2
+	.type	__lesf2, @function
+__lesf2:
+	! ---------------------------------------------------------------------
+	! int __lesf2(float a, float b):
+	! This function returns a value less than or equal to zero if neither
+	! argument is NaN and a is less than b.
+	! ---------------------------------------------------------------------
+	.global	__ltsf2
+	.type	__ltsf2, @function
+__ltsf2:
+	! ---------------------------------------------------------------------
+	! int __ltsf2(float a, float b):
+	! This function returns a value less than zero if neither argument is
+	! NaN and a is strictly less than b.
+	! ---------------------------------------------------------------------
+	.global	__cmpsf2
+	.type	__cmpsf2, @function
+__cmpsf2:
+	! ---------------------------------------------------------------------
+	! int __cmpsf2(float a, float b);
+	! This function calculates a <=> b. That is, if a is less than b, it
+	! returns -1; if a if greater than b, it returns 1; and if a and b are
+	! equal, it returns 0. If either argument is NaN, it returns 1, But you
+	! should not rely on this; If NaN is a possibility, use higher-level
+	! comparison function __unordsf2().
+	! ---------------------------------------------------------------------
 	move    $r4, #1
-	j	.LA
 
-	.global	__gesf2
-	.type	__gesf2, @function
-__gesf2:
-	.global	__gtsf2
-	.type	__gtsf2, @function
-__gtsf2:
-	move	$r4, #-1
+	.align	2
 .LA:
-	push    $lp
+#ifndef __FAST_MATH__
+	move    $r5, #0xff000000
+	slli	$r2, $r0, #1
+	slt	$r15, $r5, $r2
+	bnez    $r15, .LMnan		! a is NaN
+	slli	$r3, $r1, #1
+	slt	$r15, $r5, $r3
+	bnez    $r15, .LMnan		! b is NaN
+#endif
+	xor     $r5, $r0, $r1	        ! a and b have same sign?
+	bgez    $r5, .LSameSign
 
+.LDiffSign:
+#ifdef __FAST_MATH__
 	slli    $r2, $r0, #1
 	slli    $r3, $r1, #1
-	or      $r5, $r2, $r3
-	beqz    $r5, .LMequ
-	move    $r5, #0xff000000
-	slt     $r15, $r5, $r2
-	bnezs8  .LMnan
-	slt     $r15, $r5, $r3
-	bnezs8  .LMnan
-	srli    $r2, $r2, #1
-	sltsi   $r15, $r0, #0
-	beqzs8  .Li48
-	subri   $r2, $r2, #0
-.Li48:
-	srli    $r3, $r3, #1
-	sltsi   $r15, $r1, #0
-	beqzs8  .Li49
-	subri   $r3, $r3, #0
-.Li49:
-	slts    $r15, $r2, $r3
-	beqzs8  .Li50
-	move    $r0, #-1
-	j       .LM999
-.Li50:
-	slts    $r15, $r3, $r2
-	beqzs8  .LMequ
-	move    $r0, #1
-	j       .LM999
+#endif
+	or      $r2, $r2, $r3
+	beqz	$r2, .LMequ		! 0.0f and -0.0f are equal
+	move    $r2, #1			! when a==0.0f, return 1
+	cmovz   $r0, $r2, $r0		! otherwise, simply return a
+	ret5    $lp
+
+.LSameSign:
+	sltsi   $r15, $r0, 0		! a < 0 ?
+	bnez	$r15, .LSameSignNeg
+.LSameSignPos:
+	! a >= 0 && b >= 0, return a - b
+	sub     $r0, $r0, $r1
+	ret5    $lp
+.LSameSignNeg:
+	! a < 0 && b < 0, return b - a
+	sub     $r0, $r1, $r0
+	ret5    $lp
 
 .LMequ:
 	move    $r0, #0
-
-.LM999:
-	pop     $lp
 	ret5    $lp
 
+#ifndef __FAST_MATH__
 .LMnan:
 	move    $r0, $r4
-	j       .LM999
+	ret5    $lp
+#endif
 	.size   __cmpsf2, .-__cmpsf2
-	.size   __eqsf2, .-__eqsf2
 	.size   __ltsf2, .-__ltsf2
 	.size   __lesf2, .-__lesf2
 	.size   __nesf2, .-__nesf2
+	.size   __eqsf2, .-__eqsf2
 	.size   __gesf2, .-__gesf2
 	.size   __gtsf2, .-__gtsf2
 #endif /* L_compare_sf */
@@ -5005,125 +4970,199 @@
 	#define P2H     $r3
 	#define P2L     $r2
 #endif
+#define W1	$r5
+#define W0	$r4
+#ifdef __NDS32_REDUCE_REGS__
+	#define W2	$r6
+	#define W3	$r7
+	#define W4	$r8
+	#define W5	$r9
+#else
+	#define W2	$r16
+	#define W3	$r17
+	#define W4	$r18
+	#define W5	$r19
+#endif
+
+	.text
 	.align	2
 	.globl	__gtdf2
-	.globl	__gedf2
-	.globl	__ltdf2
-	.globl	__ledf2
-	.globl	__eqdf2
-	.globl	__nedf2
-	.globl	__cmpdf2
 	.type	__gtdf2, @function
-	.type	__gedf2, @function
-	.type	__ltdf2, @function
-	.type	__ledf2, @function
-	.type	__eqdf2, @function
-	.type	__nedf2, @function
-	.type	__cmpdf2, @function
 __gtdf2:
+	! ---------------------------------------------------------------------
+	! int __gtdf2(double a, double b):
+	! This function returns a value greater than zero if neither argument
+	! is NaN and a is strictly greater than b.
+	! ---------------------------------------------------------------------
+	.globl	__gedf2
+	.type	__gedf2, @function
 __gedf2:
-	movi	$r4, -1
-	b	.L1
+	! ---------------------------------------------------------------------
+	! int __gedf2(double a, double b):
+	! This function returns a value greater than or equal to zero if
+	! neither argument is NaN and a is greater than or equal to b.
+	! ---------------------------------------------------------------------
+	move	$r4, #-1
+	b	.LA
 
-__ltdf2:
+	.globl	__eqdf2
+	.type	__eqdf2, @function
+__eqdf2:
+	! ---------------------------------------------------------------------
+	! int __eqdf2(double a, double b):
+	! This function returns zero value if neither argument is NaN and and b
+	! are equal.
+	! ---------------------------------------------------------------------
+	.globl	__nedf2
+	.type	__nedf2, @function
+__nedf2:
+	! ---------------------------------------------------------------------
+	! int __nedf2(double a, double b):
+	! This function returns a nonzero value if either argument is NaN or if
+	! a and b are unequal.
+	! ---------------------------------------------------------------------
+	.globl	__ledf2
+	.type	__ledf2, @function
 __ledf2:
+	! ---------------------------------------------------------------------
+	! int __ledf2(double a, double b):
+	! This function returns a value less than or equal to zero if neither
+	! argument is NaN and a is less than b.
+	! ---------------------------------------------------------------------
+	.globl	__ltdf2
+	.type	__ltdf2, @function
+__ltdf2:
+	! ---------------------------------------------------------------------
+	! int __ltdf2(double a, double b):
+	! This function returns a value less than zero if neither argument is
+	! NaN and a is strictly less than b.
+	! ---------------------------------------------------------------------
+	.globl	__cmpdf2
+	.type	__cmpdf2, @function
 __cmpdf2:
-__nedf2:
-__eqdf2:
-	movi	$r4, 1
-.L1:
-#if defined (__NDS32_ISA_V3M__)
-	push25	$r10, 0
-#else
+	! ---------------------------------------------------------------------
+	! int __cmpdf2(double a, double b);
+	! This function calculates a <=> b. That is, if a is less than b, it
+	! returns -1; if a if greater than b, it returns 1; and if a and b are
+	! equal, it returns 0. If either argument is NaN, it returns 1, But you
+	! should not rely on this; If NaN is a possibility, use higher-level
+	! comparison function __unordsf2().
+	! ---------------------------------------------------------------------
+	move	$r4, #1
+
+.LA:
+	move	W1, #0
+#ifdef __NDS32_REDUCE_REGS__
 	smw.adm	$r6, [$sp], $r9, 0
 #endif
 
-	sethi	$r5, 0x7ff00
-	and	$r6, P1H, $r5	! r6=aExp
-	and	$r7, P2H, $r5	! r7=bExp
-	slli	$r8, P1H, 12	! r8=aSig0
-	slli	$r9, P2H, 12	! r9=bSig0
-	beq	$r6, $r5, .L11	! aExp==0x7ff
-	beq	$r7, $r5, .L12	! bExp==0x7ff
-.L2:
-	slli	$ta, P1H, 1	! ta=ahigh<<1
-	or	$ta, P1L, $ta	!
-	xor	$r5, P1H, P2H	! r5=ahigh^bhigh
-	beqz	$ta, .L3	! if(ahigh<<1)==0,go .L3
-	!-------------------------------
-	! (ahigh<<1)!=0 || (bhigh<<1)!=0
-	!-------------------------------
-.L4:
-	beqz	$r5, .L5	! ahigh==bhigh, go .L5
-	!--------------------
-	! a != b
-	!--------------------
-.L6:
-	bltz	$r5, .L7	! if(aSign!=bSign), go .L7
-	!--------------------
-	! aSign==bSign
-	!--------------------
-	slt	$ta, $r6, $r7	! ta=(aExp<bExp)
-	bne	$r6, $r7, .L8	! if(aExp!=bExp),go .L8
-	slt	$ta, $r8, $r9	! ta=(aSig0<bSig0)
-	bne	$r8, $r9, .L8	! if(aSig0!=bSig0),go .L8
-	slt	$ta, P1L, P2L	! ta=(aSig1<bSig1)
-.L8:
-	beqz	$ta, .L10	! if(|a|>|b|), go .L10
-	nor	$r0, P2H, P2H	! if(|a|<|b|),return (~yh)
-.L14:
-#if defined (__NDS32_ISA_V3M__)
-	pop25	$r10, 0
+	move	W4, #0xffe00000
+	slli	W2, P1H, #1
+	slt	$r15, W1, P1L
+	add	W5, W2, $r15
+	slt	$r15, W4, W5
+	bnez	$r15, .LMnan		! a is NaN
+	slli	W3, P2H, #1
+	slt	$r15, W1, P2L
+	add	W5, W3, $r15
+	slt	$r15, W4, W5
+	bnez	$r15, .LMnan		! b is NaN
+	xor	W0, P1H, P2H
+	bltz    W0, .LMdiff		! a and b same sign?
+
+	! same sign
+	sltsi	$r15, P1H, 0		! a<0?
+	bnez	$r15, .LMsame
+
+	sub	W0, P1L, P2L
+	slt	$r15, P1L, W0
+	sub	$r0, P1H, P2H
+	sub	$r0, $r0, $r15		! return a-b
+	slt	$r15, W1, W0
+	cmovz	$r0, $r15, $r0
+#ifdef __NDS32_REDUCE_REGS__
+	b	.LMret
+#else
+	ret5	$lp
+#endif
+
+.LMsame:
+	sub	W0, P2L, P1L
+	slt	$r15, P2L, W0
+	sub	$r0, P2H, P1H
+	sub	$r0, $r0, $r15		! return b-a
+	slt	$r15, W1, W0
+	cmovz	$r0, $r15, $r0
+#ifdef __NDS32_REDUCE_REGS__
+	b	.LMret
+#else
+	ret5	$lp
+
+	.align	2
+#endif
+	! different sign
+.LMdiff:
+#ifdef __NDS32_REDUCE_REGS__
+	or	W2, W2, W3		! 0.0f and -0.0f are equal
+#else
+	or	W1, W2, W3		! 0.0f and -0.0f are equal
+#endif
+	or	W0, P1L, P2L
+#ifdef __NDS32_REDUCE_REGS__
+	or	W2, W2, W0
+#else
+	or	W1, W1, W0
+#endif
+#ifdef __big_endian__
+#ifdef __NDS32_REDUCE_REGS__
+	beqz	W2, .LMequ
+#else
+	beqz	W1, .LMequ
+#endif
+
+	movi	$r2, #1			! when high-part(a) is 0, return 1
+	cmovz	$r0, $r2, P1H		! otherwise, simply return high-part(a)
+#else
+#ifdef __NDS32_REDUCE_REGS__
+	beqz	W2, .LMret
 #else
+	beqz	W1, .LMret
+#endif
+
+	movi	$r0, #1			! when high-part(a) is 0, return 1
+	cmovn	$r0, P1H, P1H		! otherwise, simply return high-part(a)
+#endif
+
+.LMret:
+#ifdef __NDS32_REDUCE_REGS__
 	lmw.bim	$r6, [$sp], $r9, 0
-	ret
 #endif
-.L10:
-	ori	$r0, P2H, 1	! return (yh|1)
-	b	.L14
-	!--------------------
-	! (ahigh<<1)=0
-	!--------------------
-.L3:
-	slli	$ta, P2H, 1	! ta=bhigh<<1
-	or	$ta, P2L, $ta	!
-	bnez	$ta, .L4	! ta=(bhigh<<1)!=0,go .L4
-.L5:
-	xor	$ta, P1L, P2L	! ta=alow^blow
-	bnez	$ta, .L6	! alow!=blow,go .L6
-	movi	$r0, 0		! a==b, return 0
-	b	.L14
-	!--------------------
-	! aExp=0x7ff;
-	!--------------------
-.L11:
-	or	P1L, P1L, $r8	! x1=(aSig0|aSig1)
-	bnez	P1L, .L13	! if(a=nan), go.L13
-	xor	$ta, $r7, $r5	! ta=(bExp^0x7ff)
-	bnez	$ta, .L2	! if(bExp!=0x7ff), go .L2
-	!--------------------
-	! bExp=0x7ff;
-	!--------------------
-.L12:
-	or	$ta, P2L, $r9	! ta=(bSig0|bSig1)
-	beqz	$ta, .L2	! if(b!=nan), go .L2
-.L13:
-	move	$r0, $r4
-	b	.L14
-	!--------------------
-	! aSign!=bSign
-	!--------------------
-.L7:
-	ori	$r0, P1H, 1	! if(aSign!=bSign), return (ahigh|1)
-	b	.L14
+	ret5	$lp
 
-	.size	__gtdf2, .-__gtdf2
-	.size	__gedf2, .-__gedf2
+#ifdef __big_endian__
+.LMequ:
+	movi	$r0, #0
+#ifdef __NDS32_REDUCE_REGS__
+	b	.LMret
+#else
+	ret5	$lp
+#endif
+#endif
+
+.LMnan:
+	move	$r0, $r4
+#ifdef __NDS32_REDUCE_REGS__
+	b	.LMret
+#else
+	ret5	$lp
+#endif
+	.size	__cmpdf2, .-__cmpdf2
 	.size	__ltdf2, .-__ltdf2
 	.size	__ledf2, .-__ledf2
-	.size	__eqdf2, .-__eqdf2
 	.size	__nedf2, .-__nedf2
-	.size	__cmpdf2, .-__cmpdf2
+	.size	__eqdf2, .-__eqdf2
+	.size	__gedf2, .-__gedf2
+	.size	__gtdf2, .-__gtdf2
 #endif /* L_compare_df */
 
 
@@ -5135,27 +5174,21 @@
 	.global	__unordsf2
 	.type	__unordsf2, @function
 __unordsf2:
-	push    $lp
-
-	slli    $r2, $r0, #1
-	move    $r3, #0xff000000
-	slt     $r15, $r3, $r2
-	beqzs8  .Li52
-	move    $r0, #1
-	j       .LP999
-.Li52:
-	slli    $r2, $r1, #1
-	move    $r3, #0xff000000
-	slt     $r15, $r3, $r2
-	beqzs8  .Li53
-	move    $r0, #1
-	j       .LP999
-.Li53:
-	move    $r0, #0
+	! ---------------------------------------------------------------------
+	! int __unordsf2(float a, float b):
+	! This function returns 1 if either argument is NaN, otherwise 0.
+	! ---------------------------------------------------------------------
+	! Is a NaN?
+	slli	$r0, $r0, #1
+	move	$r3, #0xff000000
+	slt	$r0, $r3, $r0
+	bnez	$r0, .Li67
+	! a is not NaN. Is b NaN?
+	slli	$r1, $r1, #1
+	slt	$r0, $r3, $r1
 
-.LP999:
-	pop     $lp
-	ret5    $lp
+.Li67:
+	ret5	$lp
 	.size	__unordsf2, .-__unordsf2
 #endif /* L_unord_sf */
 
@@ -5163,50 +5196,47 @@
 
 #ifdef L_unord_df
 
-#ifndef __big_endian__
-	#define P1L     $r0
-	#define P1H     $r1
-	#define P2L     $r2
-	#define P2H     $r3
+#ifdef __big_endian__
+	#define P1H	$r0
+	#define P1L	$r1
+	#define P2H	$r2
+	#define P2L	$r3
 #else
-	#define P1H     $r0
-	#define P1L     $r1
-	#define P2H     $r2
-	#define P2L     $r3
+	#define P1H	$r1
+	#define P1L	$r0
+	#define P2H	$r3
+	#define P2L	$r2
 #endif
+#define W1	$r5
+#define W0	$r4
+
 	.text
 	.align	2
 	.global	__unorddf2
 	.type	__unorddf2, @function
 __unorddf2:
-	push    $lp
+	! ---------------------------------------------------------------------
+	! int __unorddf2(double a, double b):
+	! This function returns 1 if either argument is NaN, otherwise 0. 
+	! ---------------------------------------------------------------------
+	! Is a NaN?
+	slli	P1H, P1H, #1
+	move	W0, #0
+	slt	$r15, W0, P1L
+	add	$r0, P1H, $r15
+	move	W1, #0xffe00000
+	slt	$r0, W1, $r0
+	bnez	$r0, .Li69		! it is NaN
+
+	! a is not NaN. Is b NaN?
+	slli	P2H, P2H, #1
+	slt	$r15, W0, P2L
+	add	P2H, P2H, $r15
+	slt	$r0, W1, P2H
 
-	slli    $r4, P1H, #1
-	beqz    P1L, .Li66
-	addi    $r4, $r4, #1
-.Li66:
-	move    $r5, #0xffe00000
-	slt     $r15, $r5, $r4
-	beqzs8  .Li67
-	move    $r0, #1
-	j       .LR999
-.Li67:
-	slli    $r4, P2H, #1
-	beqz    P2L, .Li68
-	addi    $r4, $r4, #1
-.Li68:
-	move    $r5, #0xffe00000
-	slt     $r15, $r5, $r4
-	beqzs8  .Li69
-	move    $r0, #1
-	j       .LR999
 .Li69:
-	move    $r0, #0
-
-.LR999:
-	pop     $lp
-	ret5    $lp
-	.size __unorddf2, .-__unorddf2
+	ret5	$lp
+	.size	__unorddf2, .-__unorddf2
 #endif /* L_unord_df */
 /* ------------------------------------------- */
 /* DPBIT floating point operations for libgcc  */
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib1asmsrc-newlib.S gcc-4.9.4/libgcc/config/nds32/lib1asmsrc-newlib.S
--- gcc-4.9.4.orig/libgcc/config/nds32/lib1asmsrc-newlib.S	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib1asmsrc-newlib.S	2016-08-08 20:37:53.750589269 +0200
@@ -1,5 +1,5 @@
 /* newlib libgcc routines of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c gcc-4.9.4/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c
--- gcc-4.9.4.orig/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib2csrc-mculib/_clzdi2.c	1970-01-01 01:00:00.000000000 +0100
@@ -1,38 +0,0 @@
-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-extern int __clzsi2 (int val);
-int
-__clzdi2 (long long val)
-{
-  if (val >> 32)
-    {
-      return __clzsi2 (val >> 32);
-    }
-  else
-    {
-      return __clzsi2 (val) + 32;
-    }
-}
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c gcc-4.9.4/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c
--- gcc-4.9.4.orig/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib2csrc-mculib/_clzsi2.c	1970-01-01 01:00:00.000000000 +0100
@@ -1,49 +0,0 @@
-/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
-   Contributed by Andes Technology Corporation.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-int
-__clzsi2 (int val)
-{
-  int i = 32;
-  int j = 16;
-  int temp;
-
-  for (; j; j >>= 1)
-    {
-      if (temp = val >> j)
-	{
-	  if (j == 1)
-	    {
-	      return (i - 2);
-	    }
-	  else
-	    {
-	      i -= j;
-	      val = temp;
-	    }
-	}
-    }
-  return (i - val);
-}
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib2src-mculib/_clzdi2.c gcc-4.9.4/libgcc/config/nds32/lib2src-mculib/_clzdi2.c
--- gcc-4.9.4.orig/libgcc/config/nds32/lib2src-mculib/_clzdi2.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib2src-mculib/_clzdi2.c	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,39 @@
+/* mculib libgcc routines of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+int
+__clzdi2 (long long val)
+{
+  unsigned int hi = (unsigned int)(val >> 32);
+
+  if (hi)
+    {
+      return __builtin_clz (hi);
+    }
+  else
+    {
+      return __builtin_clz ((unsigned int)val) + 32;
+    }
+}
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/lib2src-mculib/_clzsi2.S gcc-4.9.4/libgcc/config/nds32/lib2src-mculib/_clzsi2.S
--- gcc-4.9.4.orig/libgcc/config/nds32/lib2src-mculib/_clzsi2.S	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/lib2src-mculib/_clzsi2.S	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,47 @@
+	.text
+	.align	2
+	.global	__clzsi2
+	.type	__clzsi2, @function
+/*
+#  int n = 31;
+#  int shift = 16;
+#
+#  if (x != 0)
+#  {
+#  do
+#    {
+#      if ((x >> shift))
+#	{
+#	  n -= shift;
+#	  x >>= shift;
+#	}
+#      shift >>= 1;
+#    }
+#  while (shift > 0);
+#
+#  return n;
+#  }
+#  else
+#    return (32);
+*/
+__clzsi2:
+	beqz38	$r0, .Lzero
+	/* Handel general case.  */
+	movi	$r1, #16
+	movi	$r3, #31
+.Lloop:
+	sub333	$r4, $r3, $r1
+	srl	$r2, $r0, $r1
+	cmovn	$r3, $r4, $r2
+	cmovn	$r0, $r2, $r2
+	srli45	$r1, #1
+	bnez38	$r1, .Lloop
+
+	move	$r0, $r3
+	ret5
+
+.Lzero:
+	/* Handel corner case. (Input value is zero)  */
+	movi	$r0, #32
+	ret5
+	.size __clzsi2, .-__clzsi2
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/linux-atomic.c gcc-4.9.4/libgcc/config/nds32/linux-atomic.c
--- gcc-4.9.4.orig/libgcc/config/nds32/linux-atomic.c	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/linux-atomic.c	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,282 @@
+/* Linux-specific atomic operations for NDS32 Linux.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* We implement byte, short and int versions of each atomic operation
+   using the kernel helper defined below.  There is no support for
+   64-bit operations yet.  */
+
+/* This function copy form NDS32 Linux-kernal. */
+static inline int
+__kernel_cmpxchg (int oldval, int newval, int *mem)
+{
+  int temp1, temp2, temp3, offset;
+
+  asm volatile ("msync\tall\n"
+		"movi\t%0, #0\n"
+		"1:\n"
+		"\tllw\t%1, [%4+%0]\n"
+		"\tsub\t%3, %1, %6\n"
+		"\tcmovz\t%2, %5, %3\n"
+		"\tcmovn\t%2, %1, %3\n"
+		"\tscw\t%2, [%4+%0]\n"
+		"\tbeqz\t%2, 1b\n"
+		: "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1)
+		: "r" (mem), "r" (newval), "r" (oldval));
+
+  return temp2;
+}
+
+#define HIDDEN __attribute__ ((visibility ("hidden")))
+
+#ifdef __NDS32_EL__
+#define INVERT_MASK_1 0
+#define INVERT_MASK_2 0
+#else
+#define INVERT_MASK_1 24
+#define INVERT_MASK_2 16
+#endif
+
+#define MASK_1 0xffu
+#define MASK_2 0xffffu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int success, tmp;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      success = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (success == 0);						\
+									\
+    return tmp;								\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+  TYPE HIDDEN								\
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  {									\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+    unsigned int mask, shift, oldval, newval;				\
+    int success;							\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = ((PFX_OP (((oldval & mask) >> shift)			\
+			 INF_OP (unsigned int) val)) << shift) & mask;	\
+      newval |= oldval & ~mask;						\
+      success = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (success == 0);						\
+									\
+    return (RETURN & mask) >> shift;					\
+  }
+
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+
+#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
+  {									\
+    int tmp, success;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      success = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (success == 0);						\
+									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_WORD (add,   , +)
+OP_AND_FETCH_WORD (sub,   , -)
+OP_AND_FETCH_WORD (or,    , |)
+OP_AND_FETCH_WORD (and,   , &)
+OP_AND_FETCH_WORD (xor,   , ^)
+OP_AND_FETCH_WORD (nand, ~, &)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+
+int HIDDEN
+__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int actual_oldval, succ;
+
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (oldval != actual_oldval)
+	return actual_oldval;
+
+      succ = __kernel_cmpxchg (actual_oldval, newval, ptr);
+
+      if (succ)
+	return oldval;
+    }
+}
+
+#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
+  TYPE HIDDEN								\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+				       TYPE newval)			\
+  {									\
+    int *wordptr = (int *)((unsigned long) ptr & ~3), succ;		\
+    unsigned int mask, shift, actual_oldval, actual_newval;		\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    while (1)								\
+      {									\
+	actual_oldval = *wordptr;					\
+									\
+	if (((actual_oldval & mask) >> shift) != (unsigned int) oldval)	\
+	  return (actual_oldval & mask) >> shift;			\
+									\
+	actual_newval = (actual_oldval & ~mask)				\
+			| (((unsigned int) newval << shift) & mask);	\
+									\
+	succ = __kernel_cmpxchg (actual_oldval, actual_newval,		\
+				 wordptr);				\
+									\
+	if (succ)							\
+	  return oldval;						\
+      }									\
+  }
+
+SUBWORD_VAL_CAS (unsigned short, 2)
+SUBWORD_VAL_CAS (unsigned char,  1)
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int success = __kernel_cmpxchg (oldval, newval, ptr);
+  return (success == 0);
+}
+
+#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
+  bool HIDDEN								\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+					TYPE newval)			\
+  {									\
+    TYPE actual_oldval							\
+      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
+    return (oldval == actual_oldval);					\
+  }
+
+SUBWORD_BOOL_CAS (unsigned short, 2)
+SUBWORD_BOOL_CAS (unsigned char,  1)
+
+int HIDDEN
+__sync_lock_test_and_set_4 (int *ptr, int val)
+{
+  int success, oldval;
+
+  do {
+    oldval = *ptr;
+    success = __kernel_cmpxchg (oldval, val, ptr);
+  } while (success == 0);
+
+  return oldval;
+}
+
+#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
+  TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
+  {									\
+    int success;							\
+    unsigned int oldval, newval, shift, mask;				\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = (oldval & ~mask)						\
+	       | (((unsigned int) val << shift) & mask);		\
+      success = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (success == 0);						\
+									\
+    return (oldval & mask) >> shift;					\
+  }
+
+SUBWORD_TEST_AND_SET (unsigned short, 2)
+SUBWORD_TEST_AND_SET (unsigned char,  1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
+  void HIDDEN								\
+  __sync_lock_release_##WIDTH (TYPE *ptr)				\
+  {									\
+    /* All writes before this point must be seen before we release	\
+       the lock itself.  */						\
+    __builtin_nds32_msync_all ();					\
+    *ptr = 0;								\
+  }
+
+SYNC_LOCK_RELEASE (int,   4)
+SYNC_LOCK_RELEASE (short, 2)
+SYNC_LOCK_RELEASE (char,  1)
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/linux-unwind.h gcc-4.9.4/libgcc/config/nds32/linux-unwind.h
--- gcc-4.9.4.orig/libgcc/config/nds32/linux-unwind.h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/linux-unwind.h	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,151 @@
+/* DWARF2 EH unwinding support for NDS32 Linux signal frame.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef inhibit_libc
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   The corresponding bits in the Linux kernel are in
+   arch/nds32/kernel/signal.c.  */
+
+#include <signal.h>
+#include <asm/unistd.h>
+
+/* Exactly the same layout as the kernel structures, unique names.  */
+
+/* arch/nds32/kernel/signal.c */
+struct _sigframe {
+    struct ucontext uc;
+    unsigned long retcode;
+};
+
+struct _rt_sigframe {
+  siginfo_t info;
+  struct _sigframe sig;
+};
+
+#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state
+
+/* This function is supposed to be invoked by uw_frame_state_for()
+   when there is no unwind data available.
+
+   Generally, given the _Unwind_Context CONTEXT for a stack frame,
+   we need to look up its caller and decode information into FS.
+   However, if the exception handling happens within a signal handler,
+   the return address of signal handler is a special module, which
+   contains signal return syscall and has no FDE in the .eh_frame section.
+   We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can
+   unwind through signal frames.  */
+static _Unwind_Reason_Code
+nds32_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  u_int32_t *pc = (u_int32_t *) context->ra;
+  struct sigcontext *sc_;
+  _Unwind_Ptr new_cfa;
+
+#ifdef __NDS32_EB__
+#error "Signal handler is not supported for force unwind."
+#endif
+
+  /* Check if we are going through a signal handler.
+     See arch/nds32/kernel/signal.c implementation.
+       SWI_SYS_SIGRETURN    -> (0xeb0e0a64)
+       SWI_SYS_RT_SIGRETURN -> (0xab150a64)
+     FIXME: Currently we only handle little endian (EL) case.  */
+  if (pc[0] == 0xeb0e0a64)
+    {
+      /* Using '_sigfame' memory address to locate kernal's sigcontext.
+	 The sigcontext structures in arch/nds32/include/asm/sigcontext.h.  */
+      struct _sigframe *rt_;
+      rt_ = context->cfa;
+      sc_ = &rt_->uc.uc_mcontext;
+    }
+  else if (pc[0] == 0xab150a64)
+    {
+      /* Using '_sigfame' memory address to locate kernal's sigcontext.  */
+      struct _rt_sigframe *rt_;
+      rt_ = context->cfa;
+      sc_ = &rt_->sig.uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  /* Update cfa from sigcontext.  */
+  new_cfa = (_Unwind_Ptr) sc_;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa;
+
+#define NDS32_PUT_FS_REG(NUM, NAME) \
+  (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \
+   fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa)
+
+  /* Restore all registers value.  */
+  NDS32_PUT_FS_REG (0, nds32_r0);
+  NDS32_PUT_FS_REG (1, nds32_r1);
+  NDS32_PUT_FS_REG (2, nds32_r2);
+  NDS32_PUT_FS_REG (3, nds32_r3);
+  NDS32_PUT_FS_REG (4, nds32_r4);
+  NDS32_PUT_FS_REG (5, nds32_r5);
+  NDS32_PUT_FS_REG (6, nds32_r6);
+  NDS32_PUT_FS_REG (7, nds32_r7);
+  NDS32_PUT_FS_REG (8, nds32_r8);
+  NDS32_PUT_FS_REG (9, nds32_r9);
+  NDS32_PUT_FS_REG (10, nds32_r10);
+  NDS32_PUT_FS_REG (11, nds32_r11);
+  NDS32_PUT_FS_REG (12, nds32_r12);
+  NDS32_PUT_FS_REG (13, nds32_r13);
+  NDS32_PUT_FS_REG (14, nds32_r14);
+  NDS32_PUT_FS_REG (15, nds32_r15);
+  NDS32_PUT_FS_REG (16, nds32_r16);
+  NDS32_PUT_FS_REG (17, nds32_r17);
+  NDS32_PUT_FS_REG (18, nds32_r18);
+  NDS32_PUT_FS_REG (19, nds32_r19);
+  NDS32_PUT_FS_REG (20, nds32_r20);
+  NDS32_PUT_FS_REG (21, nds32_r21);
+  NDS32_PUT_FS_REG (22, nds32_r22);
+  NDS32_PUT_FS_REG (23, nds32_r23);
+  NDS32_PUT_FS_REG (24, nds32_r24);
+  NDS32_PUT_FS_REG (25, nds32_r25);
+
+  NDS32_PUT_FS_REG (28, nds32_fp);
+  NDS32_PUT_FS_REG (29, nds32_gp);
+  NDS32_PUT_FS_REG (30, nds32_lr);
+  NDS32_PUT_FS_REG (31, nds32_sp);
+
+  /* Restore PC, point to trigger signal instruction.  */
+  NDS32_PUT_FS_REG (32, nds32_ipc);
+
+#undef NDS32_PUT_FS_REG
+
+  /* The retaddr is PC, use PC to find FDE.  */
+  fs->retaddr_column = 32;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#endif
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/sfp-machine.h gcc-4.9.4/libgcc/config/nds32/sfp-machine.h
--- gcc-4.9.4.orig/libgcc/config/nds32/sfp-machine.h	2014-02-13 00:24:49.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/sfp-machine.h	2016-08-08 20:37:53.750589269 +0200
@@ -1,6 +1,6 @@
 /* Machine settings for software floating-point emulation
    of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GNU C Library.
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-crtstuff gcc-4.9.4/libgcc/config/nds32/t-crtstuff
--- gcc-4.9.4.orig/libgcc/config/nds32/t-crtstuff	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-crtstuff	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,5 @@
+# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
+# because then __FRAME_END__ might not be the last thing in .eh_frame
+# section.
+CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables
+CRTSTUFF_T_CFLAGS_S += -fno-asynchronous-unwind-tables
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32 gcc-4.9.4/libgcc/config/nds32/t-nds32
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32	2016-08-08 20:37:53.750589269 +0200
@@ -1,5 +1,5 @@
 # Rules of libgcc and crtstuff of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
@@ -26,33 +26,22 @@
 #   Make sure the linker script include these two objects
 #   for building .ctors/.dtors sections.
 
-# Use -DCRT_BEGIN to create beginning parts of .init and .fini content
-# Make sure you are building crtbegin1.o with -O0 optimization,
-# otherwise the static function will be optimized out
+# Use -DCRT_BEGIN to create beginning parts of .init and .fini content.
 crtbegin1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H)
 	$(GCC_FOR_TARGET) $(INCLUDES) \
 	$(CFLAGS) \
 	-DCRT_BEGIN \
 	-finhibit-size-directive -fno-inline-functions \
-	-O0 -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o
+	-fno-toplevel-reorder \
+	-Os -c $(srcdir)/config/nds32/initfini.c -o crtbegin1.o
 
-# Use -DCRT_END to create ending parts of .init and .fini content
-# Make sure you are building crtend1.o with -O0 optimization,
-# otherwise the static function will be optimized out
+# Use -DCRT_END to create ending parts of .init and .fini content.
 crtend1.o: $(srcdir)/config/nds32/initfini.c $(GCC_PASSES) $(CONFIG_H)
 	$(GCC_FOR_TARGET) $(INCLUDES) \
 	$(CFLAGS) \
 	-DCRT_END \
 	-finhibit-size-directive -fno-inline-functions \
-	-O0 -c $(srcdir)/config/nds32/initfini.c -o crtend1.o
-
-# Use this rule if and only if your crt0.o does not come from library
-# Also, be sure to add 'crtzero.o' in extra_parts in libgcc/config.host
-# and change STARTFILE_SPEC in nds32.h
-#
-#crtzero.o: $(srcdir)/config/nds32/crtzero.S $(GCC_PASSES) $(CONFIG_H)
-#	$(GCC_FOR_TARGET) $(INCLUDES) \
-#	-c $(srcdir)/config/nds32/crtzero.S -o crtzero.o
-
+	-fno-toplevel-reorder \
+	-Os -c $(srcdir)/config/nds32/initfini.c -o crtend1.o
 
 # ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-glibc gcc-4.9.4/libgcc/config/nds32/t-nds32-glibc
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-glibc	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-glibc	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,34 @@
+# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compiler flags to use when compiling 'libgcc2.c'
+HOST_LIBGCC2_CFLAGS = -O2 -fPIC
+LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c
+
+#LIB1ASMSRC   = nds32/lib1asmsrc-newlib.S
+#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3
+
+# List of functions not to build from libgcc2.c.
+#LIB2FUNCS_EXCLUDE = _clzsi2
+
+# List of extra C and assembler files(*.S) to add to static libgcc2.
+#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c
+
+# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-isr gcc-4.9.4/libgcc/config/nds32/t-nds32-isr
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-isr	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-isr	2016-08-08 20:37:53.750589269 +0200
@@ -1,5 +1,5 @@
 # Rules of c-isr library stuff of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
@@ -23,11 +23,15 @@
 # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension
 ###############################################################################
 
-# basic flags setting
-ISR_CFLAGS = $(CFLAGS) -c
+# Basic flags setting.
+ifneq ($(filter -mext-dsp,$(CFLAGS)),)
+ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -mext-zol -c
+else
+ISR_CFLAGS = $(CFLAGS) -mno-force-no-ext-zol -c
+endif
 
-# the object files we would like to create
-LIBNDS32_ISR_16B_OBJS = \
+# The object files we would like to create.
+LIBNDS32_ISR_VEC_OBJS = \
 		vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \
 		vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \
 		vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \
@@ -46,40 +50,9 @@
 		vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \
 		vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \
 		vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \
-		vec_vid72.o \
-		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
-		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
-		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
-		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
-		reset.o
-
-LIBNDS32_ISR_4B_OBJS = \
-		vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \
-		vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \
-		vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \
-		vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \
-		vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \
-		vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \
-		vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \
-		vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \
-		vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \
-		vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \
-		vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \
-		vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \
-		vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \
-		vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \
-		vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \
-		vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \
-		vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \
-		vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \
-		vec_vid72_4b.o \
-		excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \
-		excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \
-		intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \
-		intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \
-		reset_4b.o
+		vec_vid72.o
 
-LIBNDS32_ISR_COMMON_OBJS = \
+LIBNDS32_ISR_JMP_OBJS = \
 		jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \
 		jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \
 		jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \
@@ -98,29 +71,32 @@
 		jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \
 		jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \
 		jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \
-		jmptbl_vid72.o \
+		jmptbl_vid72.o
+
+LIBNDS32_ISR_COMMON_OBJS = \
+		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
+		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
+		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
+		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
+		reset.o \
 		nmih.o \
 		wrh.o
 
-LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
-
+LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
 
-# Build common objects for ISR library
-nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
 
-wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
 
-jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
+# Build vector vid objects for ISR library.
+vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
 
 
-
-# Build 16b version objects for ISR library. (no "_4b" postfix string)
-vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
+# Build jump table objects for ISR library.
+jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
 
+
+# Build commen objects for ISR library.
 excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o
 
@@ -160,48 +136,12 @@
 reset.o: $(srcdir)/config/nds32/isr-library/reset.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o
 
-# Build 4b version objects for ISR library.
-vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
-
-excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o
-
-excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o
-
-excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o
-
-excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o
-
-excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o
-
-excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o
-
-intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o
-
-intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o
-
-intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o
-
-intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o
-
-intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o
+nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
+	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
 
-intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o
+wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
+	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
 
-reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o
 
 
 # The rule to create libnds32_isr.a file
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib	1970-01-01 01:00:00.000000000 +0100
@@ -1,77 +0,0 @@
-# Rules of mculib library makefile of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
-# Contributed by Andes Technology Corporation.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3, or (at your
-# option) any later version.
-#
-# GCC is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-# License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-# Compiler flags to use when compiling 'libgcc2.c'
-HOST_LIBGCC2_CFLAGS = -Os
-
-
-LIB1ASMSRC   = nds32/lib1asmsrc-mculib.S
-
-LIB1ASMFUNCS =   \
-	_addsub_sf   \
-	_sf_to_si    \
-	_divsi3      \
-	_divdi3      \
-	_modsi3      \
-	_moddi3      \
-	_mulsi3      \
-	_udivsi3     \
-	_udivdi3     \
-	_udivmoddi4  \
-	_umodsi3     \
-	_umoddi3     \
-	_muldi3      \
-	_addsub_df   \
-	_mul_sf      \
-	_mul_df      \
-	_div_sf      \
-	_div_df      \
-	_negate_sf   \
-	_negate_df   \
-	_sf_to_df    \
-	_df_to_sf    \
-	_df_to_si    \
-	_fixsfdi     \
-	_fixdfdi     \
-	_fixunssfsi  \
-	_fixunsdfsi  \
-	_fixunssfdi  \
-	_fixunsdfdi  \
-	_si_to_sf    \
-	_si_to_df    \
-	_floatdisf   \
-	_floatdidf   \
-	_floatunsisf \
-	_floatunsidf \
-	_floatundisf \
-	_floatundidf \
-	_compare_sf  \
-	_compare_df  \
-	_unord_sf    \
-	_unord_df
-
-# List of functions not to build from libgcc2.c.
-LIB2FUNCS_EXCLUDE = _clzsi2 _clzdi2
-
-# List of extra C and assembler files(*.S) to add to static libgcc2.
-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzsi2.c
-LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-mculib/_clzdi2.c
-
-# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib-generic gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib-generic
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib-generic	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib-generic	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,78 @@
+# Rules of mculib library makefile of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compiler flags to use when compiling 'libgcc2.c'
+HOST_LIBGCC2_CFLAGS = -Os
+
+
+LIB1ASMSRC   = nds32/lib1asmsrc-mculib.S
+
+LIB1ASMFUNCS =   \
+	_addsub_sf   \
+	_divsi3      \
+	_divdi3      \
+	_modsi3      \
+	_moddi3      \
+	_mulsi3      \
+	_udivsi3     \
+	_udivdi3     \
+	_umul_ppmm   \
+	_udivmoddi4  \
+	_umodsi3     \
+	_umoddi3     \
+	_muldi3      \
+	_addsub_df   \
+	_mul_sf      \
+	_mul_df      \
+	_div_sf      \
+	_div_df      \
+	_negate_sf   \
+	_negate_df   \
+	_sf_to_df    \
+	_df_to_sf    \
+	_fixsfdi     \
+	_fixsfsi     \
+	_fixdfdi     \
+	_fixdfsi     \
+	_fixunssfsi  \
+	_fixunsdfsi  \
+	_fixunssfdi  \
+	_fixunsdfdi  \
+	_si_to_sf    \
+	_si_to_df    \
+	_floatdisf   \
+	_floatdidf   \
+	_floatunsisf \
+	_floatunsidf \
+	_floatundisf \
+	_floatundidf \
+	_compare_sf  \
+	_compare_df  \
+	_unord_sf    \
+	_unord_df
+
+# List of functions not to build from libgcc2.c.
+LIB2FUNCS_EXCLUDE = _clzdi2 _clzsi2
+
+# List of extra C and assembler files(*.S) to add to static libgcc2.
+LIB2ADD_ST += $(srcdir)/config/nds32/lib2src-mculib/_clzdi2.c
+LIB2ADD_ST += $(srcdir)/config/nds32/lib2src-mculib/_clzsi2.S
+
+# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib-softfp gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib-softfp
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-mculib-softfp	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-mculib-softfp	2016-08-08 20:37:53.750589269 +0200
@@ -0,0 +1,56 @@
+# Rules of mculib library makefile of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compiler flags to use when compiling 'libgcc2.c'
+HOST_LIBGCC2_CFLAGS = -Os
+
+LIB1ASMSRC   = nds32/lib1asmsrc-mculib.S
+
+LIB1ASMFUNCS =   \
+	_addsub_sf   \
+	_divsi3      \
+	_divdi3      \
+	_modsi3      \
+	_moddi3      \
+	_mulsi3      \
+	_udivsi3     \
+	_udivdi3     \
+	_umul_ppmm   \
+	_udivmoddi4  \
+	_umodsi3     \
+	_umoddi3     \
+	_muldi3      \
+        _fixsfdi     \
+        _fixdfdi     \
+        _fixsfsi     \
+        _fixdfsi     \
+        _fixunssfsi  \
+        _fixunsdfsi  \
+        _fixunssfdi  \
+        _fixunsdfdi
+
+# List of functions not to build from libgcc2.c.
+LIB2FUNCS_EXCLUDE = _clzdi2 _clzsi2
+
+# List of extra C and assembler files(*.S) to add to static libgcc2.
+LIB2ADD_ST += $(srcdir)/config/nds32/lib2src-mculib/_clzdi2.c
+LIB2ADD_ST += $(srcdir)/config/nds32/lib2src-mculib/_clzsi2.S
+
+# ------------------------------------------------------------------------
diff -Nur gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-newlib gcc-4.9.4/libgcc/config/nds32/t-nds32-newlib
--- gcc-4.9.4.orig/libgcc/config/nds32/t-nds32-newlib	2014-01-02 23:25:22.000000000 +0100
+++ gcc-4.9.4/libgcc/config/nds32/t-nds32-newlib	2016-08-08 20:37:53.750589269 +0200
@@ -1,5 +1,5 @@
 # Rules of newlib library makefile of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2014 Free Software Foundation, Inc.
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
diff -Nur gcc-4.9.4.orig/libgcc/config.host gcc-4.9.4/libgcc/config.host
--- gcc-4.9.4.orig/libgcc/config.host	2016-05-17 08:22:28.000000000 +0200
+++ gcc-4.9.4/libgcc/config.host	2016-08-08 20:37:53.754589424 +0200
@@ -874,6 +874,23 @@
 msp430*-*-elf)
 	tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430"
 	;;
+nds32*-linux*)
+	# Basic makefile fragment and extra_parts for crt stuff.
+	# We also append c-isr library implementation.
+	tmake_file="${tmake_file} t-slibgcc-libgcc"
+	tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp"
+	# The header file of defining MD_FALLBACK_FRAME_STATE_FOR.
+	md_unwind_header=nds32/linux-unwind.h
+	# Append library definition makefile fragment according to --with-nds32-lib=X setting.
+	case "${with_nds32_lib}" in
+	"" )
+		;;
+	*)
+		echo "Cannot accept --with-nds32-lib= for linux toolchain" 1>&2
+		exit 1
+		;;
+	esac
+	;;
 nds32*-elf*)
 	# Basic makefile fragment and extra_parts for crt stuff.
 	# We also append c-isr library implementation.
@@ -887,9 +904,19 @@
 		tmake_file="${tmake_file} nds32/t-nds32-newlib t-softfp-sfdf t-softfp"
 		;;
 	mculib)
-		# Append library definition makefile fragment t-nds32-mculib.
+		case "${with_arch}" in
+		"" | v2 | v2j | v3 | v3j | v3m)
+		# Append library definition makefile fragment t-nds32-mculib-generic.
 		# The software floating point library is included in mculib.
-		tmake_file="${tmake_file} nds32/t-nds32-mculib"
+			tmake_file="${tmake_file} nds32/t-nds32-mculib-generic"
+			;;
+		v3f | v3s)
+		# Append library definition makefile fragment t-nds32-mculib-softfp.
+		# Append mculib do not support ABI2FP_PLUS,
+		# so using'soft-fp' software floating point make rule fragment provided by gcc.
+			tmake_file="${tmake_file} nds32/t-nds32-mculib-softfp t-softfp-sfdf t-softfp"
+			;;
+		esac
 		;;
 	*)
 		echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
diff -Nur gcc-4.9.4.orig/libiberty/config.status gcc-4.9.4/libiberty/config.status
--- gcc-4.9.4.orig/libiberty/config.status	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/config.status	2016-08-08 20:37:53.866593761 +0200
@@ -0,0 +1,1200 @@
+#! /bin/sh
+# Generated by configure.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+# Prefer a ksh shell builtin over an external printf program on Solaris,
+# but without wasting forks for bash or zsh.
+if test -z "$BASH_VERSION$ZSH_VERSION" \
+    && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='print -r --'
+  as_echo_n='print -rn --'
+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in #(
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+# Unset variables that we do not need and which cause bugs (e.g. in
+# pre-3.0 UWIN ksh).  But do not cause bugs in bash 2.01; the "|| exit 1"
+# suppresses any "Segmentation fault" message there.  '((' could
+# trigger a bug in pdksh 5.2.14.
+for as_var in BASH_ENV ENV MAIL MAILPATH
+do eval test x\${$as_var+set} = xset \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# CDPATH.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+
+# as_fn_error ERROR [LINENO LOG_FD]
+# ---------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with status $?, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$?; test $as_status -eq 0 && as_status=1
+  if test "$3"; then
+    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  fi
+  $as_echo "$as_me: error: $1" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -p'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -p'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -p'
+  fi
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+  as_test_x='test -x'
+else
+  if ls -dL / >/dev/null 2>&1; then
+    as_ls_L_option=L
+  else
+    as_ls_L_option=
+  fi
+  as_test_x='
+    eval sh -c '\''
+      if test -d "$1"; then
+	test -d "$1/.";
+      else
+	case $1 in #(
+	-*)set "./$1";;
+	esac;
+	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
+	???[sx]*):;;*)false;;esac;fi
+    '\'' sh
+  '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.64.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+# Files that config.status was made for.
+config_files=" Makefile testsuite/Makefile"
+config_headers=" config.h:config.in"
+config_commands=" default"
+
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to the package provider."
+
+ac_cs_version="\
+config.status
+configured by ./configure, generated by GNU Autoconf 2.64,
+  with options \"'--host=nds32le-linux' '--prefix=/home/users/kito/toolchain/nds32le-linux-glibc-v3/nds32le-linux/sysroot/usr' 'CC=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'LD=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'RANLIB=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ranlib' 'AR=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ar' '--enable-install-libiberty' 'host_alias=nds32le-linux'\"
+
+Copyright (C) 2009 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='/home/users/kito/build-system-3/source-packages/gcc-4.9.3/libiberty'
+srcdir='.'
+INSTALL='/usr/bin/install -c'
+test -n "$AWK" || AWK=awk
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+if $ac_cs_recheck; then
+  set X '/bin/sh' './configure'  '--host=nds32le-linux' '--prefix=/home/users/kito/toolchain/nds32le-linux-glibc-v3/nds32le-linux/sysroot/usr' 'CC=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'LD=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'RANLIB=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ranlib' 'AR=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ar' '--enable-install-libiberty' 'host_alias=nds32le-linux' $ac_configure_extra_args --no-create --no-recursion
+  shift
+  $as_echo "running CONFIG_SHELL=/bin/sh $*" >&6
+  CONFIG_SHELL='/bin/sh'
+  export CONFIG_SHELL
+  exec "$@"
+fi
+
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+#
+# INIT-COMMANDS
+#
+srcdir=.
+host=nds32le-unknown-linux-gnu
+target=
+with_target_subdir=
+with_multisubdir=
+ac_configure_args="--enable-multilib  '--host=nds32le-linux' '--prefix=/home/users/kito/toolchain/nds32le-linux-glibc-v3/nds32le-linux/sysroot/usr' 'CC=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'LD=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc' 'RANLIB=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ranlib' 'AR=/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ar' '--enable-install-libiberty' 'host_alias=nds32le-linux'"
+CONFIG_SHELL=/bin/sh
+ORIGINAL_LD_FOR_MULTILIBS="/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc"
+libiberty_topdir=./..
+
+
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h:config.in" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;;
+    "default") CONFIG_COMMANDS="$CONFIG_COMMANDS default" ;;
+
+  *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+if $AWK 'BEGIN { getline <"/dev/null" }' </dev/null 2>/dev/null; then
+  ac_cs_awk_getline=:
+  ac_cs_awk_pipe_init=
+  ac_cs_awk_read_file='
+      while ((getline aline < (F[key])) > 0)
+	print(aline)
+      close(F[key])'
+  ac_cs_awk_pipe_fini=
+else
+  ac_cs_awk_getline=false
+  ac_cs_awk_pipe_init="print \"cat <<'|#_!!_#|' &&\""
+  ac_cs_awk_read_file='
+      print "|#_!!_#|"
+      print "cat " F[key] " &&"
+      '$ac_cs_awk_pipe_init
+  # The final `:' finishes the AND list.
+  ac_cs_awk_pipe_fini='END { print "|#_!!_#|"; print ":" }'
+fi
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$tmp/subs1.awk" &&
+cat >>"$tmp/subs1.awk" <<\_ACAWK &&
+F["host_makefile_frag"]="xhost-mkfrag"
+_ACAWK
+cat >>"$tmp/subs1.awk" <<\_ACAWK &&
+S["LTLIBOBJS"]=" ${LIBOBJDIR}./setproctitle$U.lo"
+S["INSTALL_DEST"]="libdir"
+S["pexecute"]="pex-unix"
+S["target_header_dir"]="libiberty"
+S["CHECK"]="really-check"
+S["LIBOBJS"]=" ${LIBOBJDIR}./setproctitle$U.o"
+S["PICFLAG"]=""
+S["INSTALL_DATA"]="${INSTALL} -m 644"
+S["INSTALL_SCRIPT"]="${INSTALL}"
+S["INSTALL_PROGRAM"]="${INSTALL}"
+S["EGREP"]="/usr/bin/grep -E"
+S["GREP"]="/usr/bin/grep"
+S["OUTPUT_OPTION"]="-o $@"
+S["NO_MINUS_C_MINUS_O"]=""
+S["ac_libiberty_warn_cflags"]="-W -Wall -Wwrite-strings -Wc++-compat -Wstrict-prototypes -pedantic "
+S["CPP"]="/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc -E"
+S["OBJEXT"]="o"
+S["EXEEXT"]=""
+S["ac_ct_CC"]=""
+S["CPPFLAGS"]=""
+S["LDFLAGS"]=""
+S["CFLAGS"]="-g -O2"
+S["CC"]="/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc"
+S["RANLIB"]="/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ranlib"
+S["AR"]="/home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ar"
+S["host_os"]="linux-gnu"
+S["host_vendor"]="unknown"
+S["host_cpu"]="nds32le"
+S["host"]="nds32le-unknown-linux-gnu"
+S["build_os"]="linux-gnu"
+S["build_vendor"]="unknown"
+S["build_cpu"]="x86_64"
+S["build"]="x86_64-unknown-linux-gnu"
+S["HAVE_PERL"]=""
+S["PERL"]="perl"
+S["BUILD_INFO"]="info"
+S["MAKEINFO"]="makeinfo"
+S["NOTMAINT"]=""
+S["MAINT"]="#"
+S["libiberty_topdir"]="./.."
+S["target_alias"]=""
+S["host_alias"]="nds32le-linux"
+S["build_alias"]=""
+S["LIBS"]=""
+S["ECHO_T"]=""
+S["ECHO_N"]="-n"
+S["ECHO_C"]=""
+S["DEFS"]="-DHAVE_CONFIG_H"
+S["mandir"]="${datarootdir}/man"
+S["localedir"]="${datarootdir}/locale"
+S["libdir"]="${exec_prefix}/lib"
+S["psdir"]="${docdir}"
+S["pdfdir"]="${docdir}"
+S["dvidir"]="${docdir}"
+S["htmldir"]="${docdir}"
+S["infodir"]="${datarootdir}/info"
+S["docdir"]="${datarootdir}/doc/${PACKAGE}"
+S["oldincludedir"]="/usr/include"
+S["includedir"]="${prefix}/include"
+S["localstatedir"]="${prefix}/var"
+S["sharedstatedir"]="${prefix}/com"
+S["sysconfdir"]="${prefix}/etc"
+S["datadir"]="${datarootdir}"
+S["datarootdir"]="${prefix}/share"
+S["libexecdir"]="${exec_prefix}/libexec"
+S["sbindir"]="${exec_prefix}/sbin"
+S["bindir"]="${exec_prefix}/bin"
+S["program_transform_name"]="s,x,x,"
+S["prefix"]="/home/users/kito/toolchain/nds32le-linux-glibc-v3/nds32le-linux/sysroot/usr"
+S["exec_prefix"]="${prefix}"
+S["PACKAGE_URL"]=""
+S["PACKAGE_BUGREPORT"]=""
+S["PACKAGE_STRING"]=""
+S["PACKAGE_VERSION"]=""
+S["PACKAGE_TARNAME"]=""
+S["PACKAGE_NAME"]=""
+S["PATH_SEPARATOR"]=":"
+S["SHELL"]="/bin/sh"
+_ACAWK
+cat >>"$tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+  $ac_cs_awk_pipe_init
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+  if (nfields == 3 && !substed) {
+    key = field[2]
+    if (F[key] != "" && line ~ /^[	 ]*@.*@[	 ]*$/) {
+      $ac_cs_awk_read_file
+      next
+    }
+  }
+  print line
+}
+$ac_cs_awk_pipe_fini
+_ACAWK
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
+  || as_fn_error "could not setup config files machinery" "$LINENO" 5
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+D["PACKAGE_NAME"]=" \"\""
+D["PACKAGE_TARNAME"]=" \"\""
+D["PACKAGE_VERSION"]=" \"\""
+D["PACKAGE_STRING"]=" \"\""
+D["PACKAGE_BUGREPORT"]=" \"\""
+D["PACKAGE_URL"]=" \"\""
+D["_FILE_OFFSET_BITS"]=" 64"
+D["STDC_HEADERS"]=" 1"
+D["HAVE_SYS_TYPES_H"]=" 1"
+D["HAVE_SYS_STAT_H"]=" 1"
+D["HAVE_STDLIB_H"]=" 1"
+D["HAVE_STRING_H"]=" 1"
+D["HAVE_MEMORY_H"]=" 1"
+D["HAVE_STRINGS_H"]=" 1"
+D["HAVE_INTTYPES_H"]=" 1"
+D["HAVE_STDINT_H"]=" 1"
+D["HAVE_UNISTD_H"]=" 1"
+D["HAVE_SYS_FILE_H"]=" 1"
+D["HAVE_SYS_PARAM_H"]=" 1"
+D["HAVE_LIMITS_H"]=" 1"
+D["HAVE_STDLIB_H"]=" 1"
+D["HAVE_MALLOC_H"]=" 1"
+D["HAVE_STRING_H"]=" 1"
+D["HAVE_UNISTD_H"]=" 1"
+D["HAVE_STRINGS_H"]=" 1"
+D["HAVE_SYS_TIME_H"]=" 1"
+D["HAVE_TIME_H"]=" 1"
+D["HAVE_SYS_RESOURCE_H"]=" 1"
+D["HAVE_SYS_STAT_H"]=" 1"
+D["HAVE_SYS_MMAN_H"]=" 1"
+D["HAVE_FCNTL_H"]=" 1"
+D["HAVE_ALLOCA_H"]=" 1"
+D["HAVE_SYS_SYSINFO_H"]=" 1"
+D["HAVE_SYS_SYSCTL_H"]=" 1"
+D["HAVE_STDINT_H"]=" 1"
+D["HAVE_STDIO_EXT_H"]=" 1"
+D["HAVE_SYS_PRCTL_H"]=" 1"
+D["HAVE_SYS_WAIT_H"]=" 1"
+D["TIME_WITH_SYS_TIME"]=" 1"
+D["SIZEOF_INT"]=" 4"
+D["UNSIGNED_64BIT_TYPE"]=" uint64_t"
+D["HAVE_INTPTR_T"]=" 1"
+D["HAVE_UINTPTR_T"]=" 1"
+D["HAVE_UINTPTR_T"]=" 1"
+D["HAVE_ASPRINTF"]=" 1"
+D["HAVE_ATEXIT"]=" 1"
+D["HAVE_BASENAME"]=" 1"
+D["HAVE_BCMP"]=" 1"
+D["HAVE_BCOPY"]=" 1"
+D["HAVE_BSEARCH"]=" 1"
+D["HAVE_BZERO"]=" 1"
+D["HAVE_CALLOC"]=" 1"
+D["HAVE_CLOCK"]=" 1"
+D["HAVE_FFS"]=" 1"
+D["HAVE_GETCWD"]=" 1"
+D["HAVE_GETPAGESIZE"]=" 1"
+D["HAVE_GETTIMEOFDAY"]=" 1"
+D["HAVE_INDEX"]=" 1"
+D["HAVE_INSQUE"]=" 1"
+D["HAVE_MEMCHR"]=" 1"
+D["HAVE_MEMCMP"]=" 1"
+D["HAVE_MEMCPY"]=" 1"
+D["HAVE_MEMMEM"]=" 1"
+D["HAVE_MEMMOVE"]=" 1"
+D["HAVE_MEMPCPY"]=" 1"
+D["HAVE_MEMSET"]=" 1"
+D["HAVE_MKSTEMPS"]=" 1"
+D["HAVE_PUTENV"]=" 1"
+D["HAVE_RANDOM"]=" 1"
+D["HAVE_RENAME"]=" 1"
+D["HAVE_RINDEX"]=" 1"
+D["HAVE_SETENV"]=" 1"
+D["HAVE_SNPRINTF"]=" 1"
+D["HAVE_SIGSETMASK"]=" 1"
+D["HAVE_STPCPY"]=" 1"
+D["HAVE_STPNCPY"]=" 1"
+D["HAVE_STRCASECMP"]=" 1"
+D["HAVE_STRCHR"]=" 1"
+D["HAVE_STRDUP"]=" 1"
+D["HAVE_STRNCASECMP"]=" 1"
+D["HAVE_STRNDUP"]=" 1"
+D["HAVE_STRNLEN"]=" 1"
+D["HAVE_STRRCHR"]=" 1"
+D["HAVE_STRSTR"]=" 1"
+D["HAVE_STRTOD"]=" 1"
+D["HAVE_STRTOL"]=" 1"
+D["HAVE_STRTOUL"]=" 1"
+D["HAVE_STRVERSCMP"]=" 1"
+D["HAVE_TMPNAM"]=" 1"
+D["HAVE_VASPRINTF"]=" 1"
+D["HAVE_VFPRINTF"]=" 1"
+D["HAVE_VPRINTF"]=" 1"
+D["HAVE_VSNPRINTF"]=" 1"
+D["HAVE_VSPRINTF"]=" 1"
+D["HAVE_WAITPID"]=" 1"
+D["STACK_DIRECTION"]=" 0"
+D["HAVE_FORK"]=" 1"
+D["HAVE_VFORK"]=" 1"
+D["HAVE_WORKING_VFORK"]=" 1"
+D["HAVE_WORKING_FORK"]=" 1"
+D["HAVE_SYS_ERRLIST"]=" 1"
+D["HAVE_SYS_NERR"]=" 1"
+D["HAVE_SYS_SIGLIST"]=" 1"
+D["HAVE___FSETLOCKING"]=" 1"
+D["HAVE_CANONICALIZE_FILE_NAME"]=" 1"
+D["HAVE_DUP3"]=" 1"
+D["HAVE_GETRLIMIT"]=" 1"
+D["HAVE_GETRUSAGE"]=" 1"
+D["HAVE_GETTIMEOFDAY"]=" 1"
+D["HAVE_ON_EXIT"]=" 1"
+D["HAVE_PSIGNAL"]=" 1"
+D["HAVE_REALPATH"]=" 1"
+D["HAVE_SETRLIMIT"]=" 1"
+D["HAVE_SBRK"]=" 1"
+D["HAVE_STRERROR"]=" 1"
+D["HAVE_STRSIGNAL"]=" 1"
+D["HAVE_SYSCONF"]=" 1"
+D["HAVE_SYSCTL"]=" 1"
+D["HAVE_TIMES"]=" 1"
+D["HAVE_WAIT3"]=" 1"
+D["HAVE_WAIT4"]=" 1"
+D["HAVE_DECL_BASENAME"]=" 0"
+D["HAVE_DECL_FFS"]=" 1"
+D["HAVE_DECL_ASPRINTF"]=" 0"
+D["HAVE_DECL_VASPRINTF"]=" 0"
+D["HAVE_DECL_SNPRINTF"]=" 1"
+D["HAVE_DECL_VSNPRINTF"]=" 1"
+D["HAVE_DECL_CALLOC"]=" 1"
+D["HAVE_DECL_GETENV"]=" 1"
+D["HAVE_DECL_GETOPT"]=" 1"
+D["HAVE_DECL_MALLOC"]=" 1"
+D["HAVE_DECL_REALLOC"]=" 1"
+D["HAVE_DECL_SBRK"]=" 1"
+D["HAVE_DECL_STRVERSCMP"]=" 0"
+D["NEED_DECLARATION_CANONICALIZE_FILE_NAME"]=" 1"
+D["HAVE_STDLIB_H"]=" 1"
+D["HAVE_UNISTD_H"]=" 1"
+D["HAVE_GETPAGESIZE"]=" 1"
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+[_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ][_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]*([\t (]|$)/ {
+  line = $ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+  as_fn_error "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin" \
+      || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+  ac_datarootdir_hack='
+  s&@datadir@&${datarootdir}&g
+  s&@docdir@&${datarootdir}/doc/${PACKAGE}&g
+  s&@infodir@&${datarootdir}/info&g
+  s&@localedir@&${datarootdir}/locale&g
+  s&@mandir@&${datarootdir}/man&g
+  s&\${datarootdir}&${prefix}/share&g' ;;
+esac
+ac_sed_extra="/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[	 ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[	 ]*$//
+}
+
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" |
+if $ac_cs_awk_getline; then
+  $AWK -f "$tmp/subs.awk"
+else
+  $AWK -f "$tmp/subs.awk" | $SHELL
+fi >$tmp/out \
+  || as_fn_error "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out" && rm -f "$tmp/out";;
+  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
+    } >"$tmp/config.h" \
+      || as_fn_error "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
+      { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+$as_echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$tmp/config.h" "$ac_file" \
+	|| as_fn_error "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    $as_echo "/* $configure_input  */" \
+      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error "could not create -" "$LINENO" 5
+  fi
+ ;;
+
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "default":C) test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h
+if test -n "$CONFIG_FILES"; then
+  if test -n "${with_target_subdir}"; then
+    # FIXME: We shouldn't need to set ac_file
+    ac_file=Makefile
+    LD="${ORIGINAL_LD_FOR_MULTILIBS}"
+    . ${libiberty_topdir}/config-ml.in
+  fi
+fi ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
diff -Nur gcc-4.9.4.orig/libiberty/Makefile gcc-4.9.4/libiberty/Makefile
--- gcc-4.9.4.orig/libiberty/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/Makefile	2016-08-08 20:37:53.866593761 +0200
@@ -0,0 +1,1282 @@
+# Makefile for the libiberty library.
+# Originally written by K. Richard Pixley <rich@cygnus.com>.
+#
+# Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+# 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+# Free Software Foundation
+#
+# This file is part of the libiberty library.
+# Libiberty is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# Libiberty is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with libiberty; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+libiberty_topdir = ./..
+srcdir = .
+
+prefix = /home/users/kito/toolchain/nds32le-linux-glibc-v3/nds32le-linux/sysroot/usr
+
+exec_prefix = ${prefix}
+bindir = ${exec_prefix}/bin
+libdir = ${exec_prefix}/lib
+includedir = ${prefix}/include
+target_header_dir = libiberty
+objext = o
+
+SHELL = /bin/sh
+
+# Multilib support variables.
+MULTISRCTOP =
+MULTIBUILDTOP =
+MULTIDIRS =
+MULTISUBDIR =
+MULTIDO = true
+MULTICLEAN = true
+
+INSTALL = /usr/bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+mkinstalldirs = $(SHELL) $(libiberty_topdir)/mkinstalldirs
+
+# Some compilers can't handle cc -c blah.c -o foo/blah.o.
+OUTPUT_OPTION = -o $@
+
+AR = /home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ar
+AR_FLAGS = rc
+
+CC = /home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc
+CFLAGS = -g -O2
+CPPFLAGS = 
+RANLIB = /home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-ranlib
+MAKEINFO = makeinfo
+PERL = perl
+
+PICFLAG = 
+
+MAKEOVERRIDES =
+
+TARGETLIB = ./libiberty.a
+TESTLIB = ./testlib.a
+
+LIBOBJS =  ${LIBOBJDIR}./setproctitle$U.o
+
+# A configuration can specify extra .o files that should be included,
+# even if they are in libc. (Perhaps the libc version is buggy.)
+EXTRA_OFILES = 
+
+# Flags to pass to a recursive make.
+FLAGS_TO_PASS = \
+	"AR=$(AR)" \
+	"AR_FLAGS=$(AR_FLAGS)" \
+	"CC=$(CC)" \
+	"CFLAGS=$(CFLAGS)" \
+	"CPPFLAGS=$(CPPFLAGS)" \
+	"DESTDIR=$(DESTDIR)" \
+	"EXTRA_OFILES=$(EXTRA_OFILES)" \
+	"HDEFINES=$(HDEFINES)" \
+	"INSTALL=$(INSTALL)" \
+	"INSTALL_DATA=$(INSTALL_DATA)" \
+	"INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
+	"LDFLAGS=$(LDFLAGS)" \
+	"LOADLIBES=$(LOADLIBES)" \
+	"RANLIB=$(RANLIB)" \
+	"SHELL=$(SHELL)" \
+	"prefix=$(prefix)" \
+	"exec_prefix=$(exec_prefix)" \
+	"libdir=$(libdir)" \
+	"libsubdir=$(libsubdir)" \
+	"tooldir=$(tooldir)"
+
+# Subdirectories to recurse into. We need to override this during cleaning
+SUBDIRS = testsuite
+
+# FIXME: add info once we're sure it works for everyone.
+all: stamp-picdir $(TARGETLIB) required-list all-subdir
+	@: $(MAKE) ; $(MULTIDO) $(FLAGS_TO_PASS) multi-do DO=all
+
+.PHONY: check installcheck
+check: check-subdir
+installcheck: installcheck-subdir
+
+# Warning: this fragment is automatically generated
+enable_shared = no
+
+INCDIR=$(srcdir)/$(MULTISRCTOP)../include
+
+COMPILE.c = $(CC) -c -DHAVE_CONFIG_H $(CFLAGS) $(CPPFLAGS) -I. -I$(INCDIR) $(HDEFINES) -W -Wall -Wwrite-strings -Wc++-compat -Wstrict-prototypes -pedantic 
+
+# Just to make sure we don't use a built-in rule with VPATH
+.c.$(objext):
+	false
+
+# NOTE: If you add new files to the library, add them to this list
+# (alphabetical), and add them to REQUIRED_OFILES, or
+# CONFIGURED_OFILES and funcs in configure.ac.  Also run "make maint-deps"
+# to build the new rules.
+CFILES = alloca.c argv.c asprintf.c atexit.c				\
+	basename.c bcmp.c bcopy.c bsearch.c bzero.c			\
+	calloc.c choose-temp.c clock.c concat.c cp-demangle.c		\
+	 cp-demint.c cplus-dem.c crc32.c				\
+	dwarfnames.c dyn-string.c					\
+	fdmatch.c ffs.c fibheap.c filename_cmp.c floatformat.c		\
+	fnmatch.c fopen_unlocked.c					\
+	getcwd.c getopt.c getopt1.c getpagesize.c getpwd.c getruntime.c	\
+         gettimeofday.c                                                 \
+	hashtab.c hex.c							\
+	index.c insque.c						\
+	lbasename.c							\
+	lrealpath.c							\
+	make-relative-prefix.c						\
+	make-temp-file.c md5.c memchr.c memcmp.c memcpy.c memmem.c	\
+	 memmove.c mempcpy.c memset.c mkstemps.c			\
+	objalloc.c obstack.c						\
+	partition.c pexecute.c						\
+	 pex-common.c pex-djgpp.c pex-msdos.c pex-one.c			\
+	 pex-unix.c pex-win32.c						\
+         physmem.c putenv.c						\
+	random.c regex.c rename.c rindex.c				\
+	safe-ctype.c setenv.c setproctitle.c sha1.c sigsetmask.c        \
+	 simple-object.c simple-object-coff.c simple-object-elf.c	\
+	 simple-object-mach-o.c simple-object-xcoff.c			\
+         snprintf.c sort.c						\
+	 spaces.c splay-tree.c stack-limit.c stpcpy.c stpncpy.c		\
+	 strcasecmp.c strchr.c strdup.c strerror.c strncasecmp.c	\
+	 strncmp.c strrchr.c strsignal.c strstr.c strtod.c strtol.c	\
+	 strtoul.c strndup.c strnlen.c strverscmp.c			\
+	timeval-utils.c tmpnam.c					\
+	unlink-if-ordinary.c						\
+	vasprintf.c vfork.c vfprintf.c vprintf.c vsnprintf.c vsprintf.c	\
+	waitpid.c							\
+	xatexit.c xexit.c xmalloc.c xmemdup.c xstrdup.c xstrerror.c	\
+	 xstrndup.c
+
+# These are always included in the library.  The first four are listed
+# first and by compile time to optimize parallel builds.
+REQUIRED_OFILES =							\
+	./regex.$(objext) ./cplus-dem.$(objext) ./cp-demangle.$(objext) \
+	./md5.$(objext) ./sha1.$(objext) ./alloca.$(objext)		\
+	./argv.$(objext)						\
+	./choose-temp.$(objext) ./concat.$(objext)			\
+	./cp-demint.$(objext) ./crc32.$(objext)				\
+	./dwarfnames.$(objext) ./dyn-string.$(objext)			\
+	./fdmatch.$(objext) ./fibheap.$(objext)				\
+	./filename_cmp.$(objext) ./floatformat.$(objext)		\
+	./fnmatch.$(objext) ./fopen_unlocked.$(objext)			\
+	./getopt.$(objext) ./getopt1.$(objext) ./getpwd.$(objext)	\
+	./getruntime.$(objext) ./hashtab.$(objext) ./hex.$(objext)	\
+	./lbasename.$(objext) ./lrealpath.$(objext)			\
+	./make-relative-prefix.$(objext) ./make-temp-file.$(objext)	\
+	./objalloc.$(objext)						\
+	./obstack.$(objext)						\
+	./partition.$(objext) ./pexecute.$(objext) ./physmem.$(objext)	\
+	./pex-common.$(objext) ./pex-one.$(objext)			\
+	./pex-unix.$(objext)						\
+	./safe-ctype.$(objext)						\
+	./simple-object.$(objext) ./simple-object-coff.$(objext)	\
+	./simple-object-elf.$(objext) ./simple-object-mach-o.$(objext)	\
+	./simple-object-xcoff.$(objext)					\
+	./sort.$(objext) ./spaces.$(objext)				\
+	./splay-tree.$(objext) ./stack-limit.$(objext)			\
+	./strerror.$(objext) ./strsignal.$(objext)			\
+	./timeval-utils.$(objext) ./unlink-if-ordinary.$(objext)	\
+	./xatexit.$(objext) ./xexit.$(objext) ./xmalloc.$(objext)	\
+	./xmemdup.$(objext) ./xstrdup.$(objext) ./xstrerror.$(objext)	\
+	./xstrndup.$(objext)
+
+# These are all the objects that configure may add to the library via
+# $funcs or EXTRA_OFILES.  This list exists here only for "make
+# maint-missing" and "make check".
+CONFIGURED_OFILES = ./asprintf.$(objext) ./atexit.$(objext)		\
+	./basename.$(objext) ./bcmp.$(objext) ./bcopy.$(objext)		\
+	./bsearch.$(objext) ./bzero.$(objext)				\
+	./calloc.$(objext) ./clock.$(objext) ./copysign.$(objext)	\
+	./_doprnt.$(objext)						\
+	 ./ffs.$(objext)						\
+	./getcwd.$(objext) ./getpagesize.$(objext)			\
+	 ./gettimeofday.$(objext)					\
+	./index.$(objext) ./insque.$(objext)				\
+	./memchr.$(objext) ./memcmp.$(objext) ./memcpy.$(objext) 	\
+	./memmem.$(objext) ./memmove.$(objext)				\
+	 ./mempcpy.$(objext) ./memset.$(objext) ./mkstemps.$(objext)	\
+	./pex-djgpp.$(objext) ./pex-msdos.$(objext)			\
+	 ./pex-unix.$(objext) ./pex-win32.$(objext)			\
+	 ./putenv.$(objext)						\
+	./random.$(objext) ./rename.$(objext) ./rindex.$(objext)	\
+	./setenv.$(objext) 						\
+	 ./setproctitle.$(objext)					\
+	 ./sigsetmask.$(objext) ./snprintf.$(objext)			\
+	 ./stpcpy.$(objext) ./stpncpy.$(objext) ./strcasecmp.$(objext)	\
+	 ./strchr.$(objext) ./strdup.$(objext) ./strncasecmp.$(objext)	\
+	 ./strncmp.$(objext) ./strndup.$(objext) ./strnlen.$(objext)	\
+	 ./strrchr.$(objext) ./strstr.$(objext) ./strtod.$(objext)	\
+	 ./strtol.$(objext) ./strtoul.$(objext) ./strverscmp.$(objext)	\
+	./tmpnam.$(objext)						\
+	./vasprintf.$(objext) ./vfork.$(objext) ./vfprintf.$(objext)	\
+	 ./vprintf.$(objext) ./vsnprintf.$(objext) ./vsprintf.$(objext)	\
+	./waitpid.$(objext)
+
+# These files are installed if the library has been configured to do so.
+INSTALLED_HEADERS =                                                     \
+	$(INCDIR)/ansidecl.h                                            \
+	$(INCDIR)/demangle.h                                            \
+	$(INCDIR)/dyn-string.h                                          \
+	$(INCDIR)/fibheap.h                                             \
+	$(INCDIR)/floatformat.h                                         \
+	$(INCDIR)/hashtab.h                                             \
+	$(INCDIR)/libiberty.h                                           \
+	$(INCDIR)/objalloc.h                                            \
+	$(INCDIR)/partition.h                                           \
+	$(INCDIR)/safe-ctype.h                                          \
+	$(INCDIR)/sort.h                                                \
+	$(INCDIR)/splay-tree.h \
+	$(INCDIR)/timeval-utils.h
+
+$(TARGETLIB): $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
+	-rm -f $(TARGETLIB) pic/$(TARGETLIB)
+	$(AR) $(AR_FLAGS) $(TARGETLIB) \
+	  $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
+	$(RANLIB) $(TARGETLIB)
+	if [ x"$(PICFLAG)" != x ]; then \
+	  cd pic; \
+	  $(AR) $(AR_FLAGS) $(TARGETLIB) \
+	    $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS); \
+	  $(RANLIB) $(TARGETLIB); \
+	  cd ..; \
+	else true; fi
+
+$(TESTLIB): $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
+	-rm -f $(TESTLIB)
+	$(AR) $(AR_FLAGS) $(TESTLIB) \
+	  $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
+	$(RANLIB) $(TESTLIB)
+
+info: libiberty.info info-subdir
+install-info: install-info-subdir
+clean-info: clean-info-subdir
+dvi: libiberty.dvi dvi-subdir
+
+LIBIBERTY_PDFFILES = libiberty.pdf
+
+pdf: $(LIBIBERTY_PDFFILES) pdf-subdir
+
+.PHONY: install-pdf
+
+pdf__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+
+install-pdf: $(LIBIBERTY_PDFFILES)
+	@$(NORMAL_INSTALL)
+	test -z "$(pdfdir)" || $(mkinstalldirs) "$(DESTDIR)$(pdfdir)"
+	@list='$(LIBIBERTY_PDFFILES)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  f=$(pdf__strip_dir) \
+	  echo " $(INSTALL_DATA) '$$d$$p' '$(DESTDIR)$(pdfdir)/$$f'"; \
+	  $(INSTALL_DATA) "$$d$$p" "$(DESTDIR)$(pdfdir)/$$f"; \
+	done
+
+# html, install-html targets
+HTMLS = libiberty.html
+
+html: $(HTMLS)
+
+.PHONY: install-html install-html-am
+
+NORMAL_INSTALL = :
+mkdir_p = mkdir -p --
+
+html__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+
+install-html: install-html-am
+
+install-html-am: $(HTMLS)
+	@$(NORMAL_INSTALL)
+	test -z "$(htmldir)" || $(mkdir_p) "$(DESTDIR)$(htmldir)"
+	@list='$(HTMLS)'; for p in $$list; do \
+	  if test -f "$$p" || test -d "$$p"; then d=""; else d="$(srcdir)/"; fi; \
+	  f=$(html__strip_dir) \
+	  if test -d "$$d$$p"; then \
+	    echo " $(mkdir_p) '$(DESTDIR)$(htmldir)/$$f'"; \
+	    $(mkdir_p) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \
+	    echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \
+	    $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f"; \
+	  else \
+	    echo " $(INSTALL_DATA) '$$d$$p' '$(DESTDIR)$(htmldir)/$$f'"; \
+	    $(INSTALL_DATA) "$$d$$p" "$(DESTDIR)$(htmldir)/$$f"; \
+	  fi; \
+	done
+
+TEXISRC = \
+	$(srcdir)/libiberty.texi \
+	$(srcdir)/copying-lib.texi \
+	$(srcdir)/obstacks.texi \
+	$(srcdir)/functions.texi
+
+# Additional files that have texi snippets that need to be collected
+# and sorted.  Some are here because the sources are imported from
+# elsewhere.  Others represent headers in ../include.
+TEXIFILES = fnmatch.txh pexecute.txh simple-object.txh
+
+libiberty.info : $(srcdir)/libiberty.texi $(TEXISRC)
+	$(MAKEINFO) -I$(srcdir) $(srcdir)/libiberty.texi
+
+libiberty.dvi : $(srcdir)/libiberty.texi $(TEXISRC)
+	texi2dvi $(srcdir)/libiberty.texi
+
+libiberty.pdf : $(srcdir)/libiberty.texi $(TEXISRC)
+	texi2pdf $(srcdir)/libiberty.texi
+
+libiberty.html : $(srcdir)/libiberty.texi $(TEXISRC)
+	$(MAKEINFO) --no-split --html -I$(srcdir) -o $@ $<
+
+#$(srcdir)/functions.texi : stamp-functions
+#	@true
+
+#stamp-functions : $(CFILES:%=$(srcdir)/%) $(TEXIFILES:%=$(srcdir)/%) $(srcdir)/gather-docs Makefile
+#	$(PERL) $(srcdir)/gather-docs $(srcdir) $(srcdir)/functions.texi $(CFILES) $(TEXIFILES)
+#	echo stamp > stamp-functions
+
+INSTALL_DEST = libdir
+install: install_to_$(INSTALL_DEST) install-subdir
+install-strip: install
+
+.PHONY: install install-strip
+
+# This is tricky.  Even though CC in the Makefile contains
+# multilib-specific flags, it's overridden by FLAGS_TO_PASS from the
+# default multilib, so we have to take CFLAGS into account as well,
+# since it will be passed the multilib flags.
+MULTIOSDIR = `$(CC) $(CFLAGS) -print-multi-os-directory`
+install_to_libdir: all
+	if test -n "${target_header_dir}"; then \
+		${mkinstalldirs} $(DESTDIR)$(libdir)/$(MULTIOSDIR); \
+		$(INSTALL_DATA) $(TARGETLIB) $(DESTDIR)$(libdir)/$(MULTIOSDIR)/$(TARGETLIB)n; \
+		( cd $(DESTDIR)$(libdir)/$(MULTIOSDIR) ; chmod 644 $(TARGETLIB)n ;$(RANLIB) $(TARGETLIB)n ); \
+		mv -f $(DESTDIR)$(libdir)/$(MULTIOSDIR)/$(TARGETLIB)n $(DESTDIR)$(libdir)/$(MULTIOSDIR)/$(TARGETLIB); \
+		case "${target_header_dir}" in \
+		  /*)    thd=${target_header_dir};; \
+		  *)     thd=${includedir}/${target_header_dir};; \
+		esac; \
+		${mkinstalldirs} $(DESTDIR)$${thd}; \
+		for h in ${INSTALLED_HEADERS}; do \
+		  ${INSTALL_DATA} $$h $(DESTDIR)$${thd}; \
+		done; \
+	fi
+	@$(MULTIDO) $(FLAGS_TO_PASS) multi-do DO=install
+
+install_to_tooldir: all
+	${mkinstalldirs} $(DESTDIR)$(tooldir)/lib/$(MULTIOSDIR)
+	$(INSTALL_DATA) $(TARGETLIB) $(DESTDIR)$(tooldir)/lib/$(MULTIOSDIR)/$(TARGETLIB)n
+	( cd $(DESTDIR)$(tooldir)/lib/$(MULTIOSDIR) ; chmod 644 $(TARGETLIB)n; $(RANLIB) $(TARGETLIB)n )
+	mv -f $(DESTDIR)$(tooldir)/lib/$(MULTIOSDIR)/$(TARGETLIB)n $(DESTDIR)$(tooldir)/lib/$(MULTIOSDIR)/$(TARGETLIB)
+	@$(MULTIDO) $(FLAGS_TO_PASS) multi-do DO=install
+
+# required-list was used when building a shared bfd/opcodes/libiberty
+# library.  I don't know if it used by anything currently.
+required-list: Makefile
+	echo $(REQUIRED_OFILES) > required-list
+
+stamp-picdir:
+	if [ x"$(PICFLAG)" != x ] && [ ! -d pic ]; then \
+	  mkdir pic; \
+	else true; fi
+	touch stamp-picdir
+
+.PHONY: all etags tags ls clean stage1 stage2
+
+etags tags: TAGS etags-subdir
+
+TAGS: $(CFILES)
+	etags `for i in $(CFILES); do echo $(srcdir)/$$i ; done`
+
+# The standalone demangler (c++filt) has been moved to binutils.
+# But make this target work anyway for demangler hacking.
+demangle: $(ALL) $(srcdir)/cp-demangle.c
+	@echo "The standalone demangler, now named c++filt, is now"
+	@echo "a part of binutils."
+	$(CC) -DHAVE_CONFIG_H $(CFLAGS) $(CPPFLAGS) -I. -I$(INCDIR) $(HDEFINES) \
+	  $(srcdir)/cp-demangle.c -DSTANDALONE_DEMANGLER $(TARGETLIB) -o $@
+
+ls:
+	@echo Makefile $(CFILES)
+
+# Various targets for maintainers.
+
+maint-missing :
+	@$(PERL) $(srcdir)/maint-tool -s $(srcdir) missing $(CFILES) $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
+
+maint-buildall : $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
+	@true
+
+maint-undoc : $(srcdir)/functions.texi
+	@$(PERL) $(srcdir)/maint-tool -s $(srcdir) undoc
+
+maint-deps :
+	@$(PERL) $(srcdir)/maint-tool -s $(srcdir) deps $(INCDIR)
+
+# Need to deal with profiled libraries, too.
+
+# Cleaning has to be done carefully to ensure that we don't clean our SUBDIRS
+# multiple times, hence our explicit recursion with an empty SUBDIRS.
+mostlyclean: mostlyclean-subdir
+	-rm -rf *.$(objext) pic core errs \#* *.E a.out
+	-rm -f errors dummy config.h stamp-*
+	-rm -f $(CONFIG_H) stamp-picdir
+	-rm -f libiberty.aux libiberty.cp libiberty.cps libiberty.fn libiberty.ky
+	-rm -f libiberty.log libiberty.tmp libiberty.tps libiberty.pg
+	-rm -f libiberty.pgs libiberty.toc libiberty.tp libiberty.tpl libiberty.vr
+	-rm -f libtexi.stamp
+	@$(MULTICLEAN) multi-clean DO=mostlyclean
+clean: clean-subdir
+	$(MAKE) SUBDIRS="" mostlyclean
+	-rm -f *.a required-list tmpmulti.out
+	-rm -f libiberty.dvi libiberty.pdf libiberty.info* libiberty.html
+	@$(MULTICLEAN) multi-clean DO=clean
+distclean: distclean-subdir
+	$(MAKE) SUBDIRS="" clean
+	@$(MULTICLEAN) multi-clean DO=distclean
+	-rm -f *~ Makefile config.cache config.status xhost-mkfrag TAGS multilib.out
+	-rm -f config.log
+	-rmdir testsuite 2>/dev/null
+maintainer-clean realclean: maintainer-clean-subdir
+	$(MAKE) SUBDIRS="" distclean
+
+force:
+
+Makefile: $(srcdir)/Makefile.in config.status
+	CONFIG_FILES=Makefile CONFIG_HEADERS= $(SHELL) ./config.status
+
+# Depending on Makefile makes sure that config.status has been re-run
+# if needed.  This prevents problems with parallel builds.
+config.h: stamp-h ; @true
+stamp-h: $(srcdir)/config.in config.status Makefile
+	CONFIG_FILES= CONFIG_HEADERS=config.h:$(srcdir)/config.in $(SHELL) ./config.status
+
+config.status: $(srcdir)/configure
+	$(SHELL) ./config.status --recheck
+
+AUTOCONF = autoconf
+configure_deps = $(srcdir)/aclocal.m4 \
+	$(srcdir)/../config/acx.m4 \
+	$(srcdir)/../config/no-executables.m4 \
+	$(srcdir)/../config/override.m4 \
+	$(srcdir)/../config/warnings.m4 \
+
+$(srcdir)/configure: # $(srcdir)/configure.ac $(configure_deps)
+	cd $(srcdir) && $(AUTOCONF)
+
+# Depending on config.h makes sure that config.status has been re-run
+# if needed.  This prevents problems with parallel builds, in case
+# subdirectories need to run config.status also.
+all-subdir check-subdir installcheck-subdir info-subdir	\
+install-info-subdir clean-info-subdir dvi-subdir pdf-subdir install-subdir	\
+etags-subdir mostlyclean-subdir clean-subdir distclean-subdir \
+maintainer-clean-subdir: config.h
+	@subdirs='$(SUBDIRS)'; \
+	target=`echo $@ | sed -e 's/-subdir//'`; \
+	for dir in $$subdirs ; do \
+	  cd $$dir && $(MAKE) $(FLAGS_TO_PASS) $$target; \
+	done
+
+$(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS): stamp-picdir
+$(CONFIGURED_OFILES): stamp-picdir
+
+# Don't export variables to the environment, in order to not confuse
+# configure.
+.NOEXPORT:
+
+# The dependencies in the remainder of this file are automatically
+# generated by "make maint-deps".  Manual edits will be lost.
+
+./_doprnt.$(objext): $(srcdir)/_doprnt.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/_doprnt.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/_doprnt.c $(OUTPUT_OPTION)
+
+./alloca.$(objext): $(srcdir)/alloca.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/alloca.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/alloca.c $(OUTPUT_OPTION)
+
+./argv.$(objext): $(srcdir)/argv.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/argv.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/argv.c $(OUTPUT_OPTION)
+
+./asprintf.$(objext): $(srcdir)/asprintf.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/asprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/asprintf.c $(OUTPUT_OPTION)
+
+./atexit.$(objext): $(srcdir)/atexit.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/atexit.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/atexit.c $(OUTPUT_OPTION)
+
+./basename.$(objext): $(srcdir)/basename.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/basename.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/basename.c $(OUTPUT_OPTION)
+
+./bcmp.$(objext): $(srcdir)/bcmp.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/bcmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/bcmp.c $(OUTPUT_OPTION)
+
+./bcopy.$(objext): $(srcdir)/bcopy.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/bcopy.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/bcopy.c $(OUTPUT_OPTION)
+
+./bsearch.$(objext): $(srcdir)/bsearch.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/bsearch.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/bsearch.c $(OUTPUT_OPTION)
+
+./bzero.$(objext): $(srcdir)/bzero.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/bzero.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/bzero.c $(OUTPUT_OPTION)
+
+./calloc.$(objext): $(srcdir)/calloc.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/calloc.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/calloc.c $(OUTPUT_OPTION)
+
+./choose-temp.$(objext): $(srcdir)/choose-temp.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/choose-temp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/choose-temp.c $(OUTPUT_OPTION)
+
+./clock.$(objext): $(srcdir)/clock.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/clock.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/clock.c $(OUTPUT_OPTION)
+
+./concat.$(objext): $(srcdir)/concat.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/concat.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/concat.c $(OUTPUT_OPTION)
+
+./copysign.$(objext): $(srcdir)/copysign.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/copysign.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/copysign.c $(OUTPUT_OPTION)
+
+./cp-demangle.$(objext): $(srcdir)/cp-demangle.c config.h $(INCDIR)/ansidecl.h \
+	$(srcdir)/cp-demangle.h $(INCDIR)/demangle.h \
+	$(INCDIR)/dyn-string.h $(INCDIR)/getopt.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/cp-demangle.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/cp-demangle.c $(OUTPUT_OPTION)
+
+./cp-demint.$(objext): $(srcdir)/cp-demint.c config.h $(INCDIR)/ansidecl.h \
+	$(srcdir)/cp-demangle.h $(INCDIR)/demangle.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/cp-demint.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/cp-demint.c $(OUTPUT_OPTION)
+
+./cplus-dem.$(objext): $(srcdir)/cplus-dem.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/demangle.h $(INCDIR)/libiberty.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/cplus-dem.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/cplus-dem.c $(OUTPUT_OPTION)
+
+./crc32.$(objext): $(srcdir)/crc32.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/crc32.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/crc32.c $(OUTPUT_OPTION)
+
+./dwarfnames.$(objext): $(srcdir)/dwarfnames.c $(INCDIR)/dwarf2.def \
+	$(INCDIR)/dwarf2.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/dwarfnames.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/dwarfnames.c $(OUTPUT_OPTION)
+
+./dyn-string.$(objext): $(srcdir)/dyn-string.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/dyn-string.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/dyn-string.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/dyn-string.c $(OUTPUT_OPTION)
+
+./fdmatch.$(objext): $(srcdir)/fdmatch.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/fdmatch.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/fdmatch.c $(OUTPUT_OPTION)
+
+./ffs.$(objext): $(srcdir)/ffs.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/ffs.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/ffs.c $(OUTPUT_OPTION)
+
+./fibheap.$(objext): $(srcdir)/fibheap.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/fibheap.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/fibheap.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/fibheap.c $(OUTPUT_OPTION)
+
+./filename_cmp.$(objext): $(srcdir)/filename_cmp.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/filenames.h $(INCDIR)/hashtab.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/filename_cmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/filename_cmp.c $(OUTPUT_OPTION)
+
+./floatformat.$(objext): $(srcdir)/floatformat.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/floatformat.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/floatformat.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/floatformat.c $(OUTPUT_OPTION)
+
+./fnmatch.$(objext): $(srcdir)/fnmatch.c config.h $(INCDIR)/fnmatch.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/fnmatch.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/fnmatch.c $(OUTPUT_OPTION)
+
+./fopen_unlocked.$(objext): $(srcdir)/fopen_unlocked.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/fopen_unlocked.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/fopen_unlocked.c $(OUTPUT_OPTION)
+
+./getcwd.$(objext): $(srcdir)/getcwd.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getcwd.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getcwd.c $(OUTPUT_OPTION)
+
+./getopt.$(objext): $(srcdir)/getopt.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/getopt.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getopt.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getopt.c $(OUTPUT_OPTION)
+
+./getopt1.$(objext): $(srcdir)/getopt1.c config.h $(INCDIR)/getopt.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getopt1.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getopt1.c $(OUTPUT_OPTION)
+
+./getpagesize.$(objext): $(srcdir)/getpagesize.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getpagesize.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getpagesize.c $(OUTPUT_OPTION)
+
+./getpwd.$(objext): $(srcdir)/getpwd.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getpwd.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getpwd.c $(OUTPUT_OPTION)
+
+./getruntime.$(objext): $(srcdir)/getruntime.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/getruntime.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/getruntime.c $(OUTPUT_OPTION)
+
+./gettimeofday.$(objext): $(srcdir)/gettimeofday.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/gettimeofday.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/gettimeofday.c $(OUTPUT_OPTION)
+
+./hashtab.$(objext): $(srcdir)/hashtab.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/hashtab.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/hashtab.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/hashtab.c $(OUTPUT_OPTION)
+
+./hex.$(objext): $(srcdir)/hex.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/hex.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/hex.c $(OUTPUT_OPTION)
+
+./index.$(objext): $(srcdir)/index.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/index.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/index.c $(OUTPUT_OPTION)
+
+./insque.$(objext): $(srcdir)/insque.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/insque.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/insque.c $(OUTPUT_OPTION)
+
+./lbasename.$(objext): $(srcdir)/lbasename.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/filenames.h $(INCDIR)/hashtab.h $(INCDIR)/libiberty.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/lbasename.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/lbasename.c $(OUTPUT_OPTION)
+
+./lrealpath.$(objext): $(srcdir)/lrealpath.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/lrealpath.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/lrealpath.c $(OUTPUT_OPTION)
+
+./make-relative-prefix.$(objext): $(srcdir)/make-relative-prefix.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/make-relative-prefix.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/make-relative-prefix.c $(OUTPUT_OPTION)
+
+./make-temp-file.$(objext): $(srcdir)/make-temp-file.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/make-temp-file.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/make-temp-file.c $(OUTPUT_OPTION)
+
+./md5.$(objext): $(srcdir)/md5.c config.h $(INCDIR)/ansidecl.h $(INCDIR)/md5.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/md5.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/md5.c $(OUTPUT_OPTION)
+
+./memchr.$(objext): $(srcdir)/memchr.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memchr.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memchr.c $(OUTPUT_OPTION)
+
+./memcmp.$(objext): $(srcdir)/memcmp.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memcmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memcmp.c $(OUTPUT_OPTION)
+
+./memcpy.$(objext): $(srcdir)/memcpy.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memcpy.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memcpy.c $(OUTPUT_OPTION)
+
+./memmem.$(objext): $(srcdir)/memmem.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memmem.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memmem.c $(OUTPUT_OPTION)
+
+./memmove.$(objext): $(srcdir)/memmove.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memmove.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memmove.c $(OUTPUT_OPTION)
+
+./mempcpy.$(objext): $(srcdir)/mempcpy.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/mempcpy.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/mempcpy.c $(OUTPUT_OPTION)
+
+./memset.$(objext): $(srcdir)/memset.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/memset.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/memset.c $(OUTPUT_OPTION)
+
+./mkstemps.$(objext): $(srcdir)/mkstemps.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/mkstemps.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/mkstemps.c $(OUTPUT_OPTION)
+
+./msdos.$(objext): $(srcdir)/msdos.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/msdos.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/msdos.c $(OUTPUT_OPTION)
+
+./objalloc.$(objext): $(srcdir)/objalloc.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/objalloc.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/objalloc.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/objalloc.c $(OUTPUT_OPTION)
+
+./obstack.$(objext): $(srcdir)/obstack.c config.h $(INCDIR)/obstack.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/obstack.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/obstack.c $(OUTPUT_OPTION)
+
+./partition.$(objext): $(srcdir)/partition.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/partition.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/partition.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/partition.c $(OUTPUT_OPTION)
+
+./pex-common.$(objext): $(srcdir)/pex-common.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(srcdir)/pex-common.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-common.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-common.c $(OUTPUT_OPTION)
+
+./pex-djgpp.$(objext): $(srcdir)/pex-djgpp.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(srcdir)/pex-common.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-djgpp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-djgpp.c $(OUTPUT_OPTION)
+
+./pex-msdos.$(objext): $(srcdir)/pex-msdos.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(srcdir)/pex-common.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-msdos.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-msdos.c $(OUTPUT_OPTION)
+
+./pex-one.$(objext): $(srcdir)/pex-one.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-one.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-one.c $(OUTPUT_OPTION)
+
+./pex-unix.$(objext): $(srcdir)/pex-unix.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(srcdir)/pex-common.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-unix.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-unix.c $(OUTPUT_OPTION)
+
+./pex-win32.$(objext): $(srcdir)/pex-win32.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(srcdir)/pex-common.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pex-win32.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pex-win32.c $(OUTPUT_OPTION)
+
+./pexecute.$(objext): $(srcdir)/pexecute.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/pexecute.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/pexecute.c $(OUTPUT_OPTION)
+
+./physmem.$(objext): $(srcdir)/physmem.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/physmem.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/physmem.c $(OUTPUT_OPTION)
+
+./putenv.$(objext): $(srcdir)/putenv.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/putenv.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/putenv.c $(OUTPUT_OPTION)
+
+./random.$(objext): $(srcdir)/random.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/random.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/random.c $(OUTPUT_OPTION)
+
+./regex.$(objext): $(srcdir)/regex.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/xregex.h $(INCDIR)/xregex2.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/regex.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/regex.c $(OUTPUT_OPTION)
+
+./rename.$(objext): $(srcdir)/rename.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/rename.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/rename.c $(OUTPUT_OPTION)
+
+./rindex.$(objext): $(srcdir)/rindex.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/rindex.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/rindex.c $(OUTPUT_OPTION)
+
+./safe-ctype.$(objext): $(srcdir)/safe-ctype.c $(INCDIR)/ansidecl.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/safe-ctype.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/safe-ctype.c $(OUTPUT_OPTION)
+
+./setenv.$(objext): $(srcdir)/setenv.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/setenv.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/setenv.c $(OUTPUT_OPTION)
+
+./setproctitle.$(objext): $(srcdir)/setproctitle.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/setproctitle.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/setproctitle.c $(OUTPUT_OPTION)
+
+./sha1.$(objext): $(srcdir)/sha1.c config.h $(INCDIR)/ansidecl.h $(INCDIR)/sha1.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/sha1.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/sha1.c $(OUTPUT_OPTION)
+
+./sigsetmask.$(objext): $(srcdir)/sigsetmask.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/sigsetmask.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/sigsetmask.c $(OUTPUT_OPTION)
+
+./simple-object-coff.$(objext): $(srcdir)/simple-object-coff.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h \
+	$(srcdir)/simple-object-common.h $(INCDIR)/simple-object.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/simple-object-coff.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/simple-object-coff.c $(OUTPUT_OPTION)
+
+./simple-object-elf.$(objext): $(srcdir)/simple-object-elf.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h \
+	$(srcdir)/simple-object-common.h $(INCDIR)/simple-object.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/simple-object-elf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/simple-object-elf.c $(OUTPUT_OPTION)
+
+./simple-object-mach-o.$(objext): $(srcdir)/simple-object-mach-o.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h \
+	$(srcdir)/simple-object-common.h $(INCDIR)/simple-object.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/simple-object-mach-o.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/simple-object-mach-o.c $(OUTPUT_OPTION)
+
+./simple-object-xcoff.$(objext): $(srcdir)/simple-object-xcoff.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h \
+	$(srcdir)/simple-object-common.h $(INCDIR)/simple-object.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/simple-object-xcoff.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/simple-object-xcoff.c $(OUTPUT_OPTION)
+
+./simple-object.$(objext): $(srcdir)/simple-object.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h \
+	$(srcdir)/simple-object-common.h $(INCDIR)/simple-object.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/simple-object.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/simple-object.c $(OUTPUT_OPTION)
+
+./snprintf.$(objext): $(srcdir)/snprintf.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/snprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/snprintf.c $(OUTPUT_OPTION)
+
+./sort.$(objext): $(srcdir)/sort.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/sort.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/sort.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/sort.c $(OUTPUT_OPTION)
+
+./spaces.$(objext): $(srcdir)/spaces.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/spaces.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/spaces.c $(OUTPUT_OPTION)
+
+./splay-tree.$(objext): $(srcdir)/splay-tree.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/splay-tree.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/splay-tree.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/splay-tree.c $(OUTPUT_OPTION)
+
+./stack-limit.$(objext): $(srcdir)/stack-limit.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/stack-limit.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/stack-limit.c $(OUTPUT_OPTION)
+
+./stpcpy.$(objext): $(srcdir)/stpcpy.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/stpcpy.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/stpcpy.c $(OUTPUT_OPTION)
+
+./stpncpy.$(objext): $(srcdir)/stpncpy.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/stpncpy.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/stpncpy.c $(OUTPUT_OPTION)
+
+./strcasecmp.$(objext): $(srcdir)/strcasecmp.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strcasecmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strcasecmp.c $(OUTPUT_OPTION)
+
+./strchr.$(objext): $(srcdir)/strchr.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strchr.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strchr.c $(OUTPUT_OPTION)
+
+./strdup.$(objext): $(srcdir)/strdup.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strdup.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strdup.c $(OUTPUT_OPTION)
+
+./strerror.$(objext): $(srcdir)/strerror.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strerror.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strerror.c $(OUTPUT_OPTION)
+
+./strncasecmp.$(objext): $(srcdir)/strncasecmp.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strncasecmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strncasecmp.c $(OUTPUT_OPTION)
+
+./strncmp.$(objext): $(srcdir)/strncmp.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strncmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strncmp.c $(OUTPUT_OPTION)
+
+./strndup.$(objext): $(srcdir)/strndup.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strndup.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strndup.c $(OUTPUT_OPTION)
+
+./strnlen.$(objext): $(srcdir)/strnlen.c config.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strnlen.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strnlen.c $(OUTPUT_OPTION)
+
+./strrchr.$(objext): $(srcdir)/strrchr.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strrchr.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strrchr.c $(OUTPUT_OPTION)
+
+./strsignal.$(objext): $(srcdir)/strsignal.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strsignal.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strsignal.c $(OUTPUT_OPTION)
+
+./strstr.$(objext): $(srcdir)/strstr.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strstr.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strstr.c $(OUTPUT_OPTION)
+
+./strtod.$(objext): $(srcdir)/strtod.c $(INCDIR)/ansidecl.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strtod.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strtod.c $(OUTPUT_OPTION)
+
+./strtol.$(objext): $(srcdir)/strtol.c config.h $(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strtol.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strtol.c $(OUTPUT_OPTION)
+
+./strtoul.$(objext): $(srcdir)/strtoul.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strtoul.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strtoul.c $(OUTPUT_OPTION)
+
+./strverscmp.$(objext): $(srcdir)/strverscmp.c $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h $(INCDIR)/safe-ctype.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/strverscmp.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/strverscmp.c $(OUTPUT_OPTION)
+
+./timeval-utils.$(objext): $(srcdir)/timeval-utils.c config.h \
+	$(INCDIR)/timeval-utils.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/timeval-utils.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/timeval-utils.c $(OUTPUT_OPTION)
+
+./tmpnam.$(objext): $(srcdir)/tmpnam.c
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/tmpnam.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/tmpnam.c $(OUTPUT_OPTION)
+
+./unlink-if-ordinary.$(objext): $(srcdir)/unlink-if-ordinary.c config.h \
+	$(INCDIR)/ansidecl.h $(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/unlink-if-ordinary.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/unlink-if-ordinary.c $(OUTPUT_OPTION)
+
+./vasprintf.$(objext): $(srcdir)/vasprintf.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vasprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vasprintf.c $(OUTPUT_OPTION)
+
+./vfork.$(objext): $(srcdir)/vfork.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vfork.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vfork.c $(OUTPUT_OPTION)
+
+./vfprintf.$(objext): $(srcdir)/vfprintf.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vfprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vfprintf.c $(OUTPUT_OPTION)
+
+./vprintf.$(objext): $(srcdir)/vprintf.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vprintf.c $(OUTPUT_OPTION)
+
+./vsnprintf.$(objext): $(srcdir)/vsnprintf.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vsnprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vsnprintf.c $(OUTPUT_OPTION)
+
+./vsprintf.$(objext): $(srcdir)/vsprintf.c $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/vsprintf.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/vsprintf.c $(OUTPUT_OPTION)
+
+./waitpid.$(objext): $(srcdir)/waitpid.c config.h $(INCDIR)/ansidecl.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/waitpid.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/waitpid.c $(OUTPUT_OPTION)
+
+./xatexit.$(objext): $(srcdir)/xatexit.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xatexit.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xatexit.c $(OUTPUT_OPTION)
+
+./xexit.$(objext): $(srcdir)/xexit.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xexit.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xexit.c $(OUTPUT_OPTION)
+
+./xmalloc.$(objext): $(srcdir)/xmalloc.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xmalloc.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xmalloc.c $(OUTPUT_OPTION)
+
+./xmemdup.$(objext): $(srcdir)/xmemdup.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xmemdup.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xmemdup.c $(OUTPUT_OPTION)
+
+./xstrdup.$(objext): $(srcdir)/xstrdup.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xstrdup.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xstrdup.c $(OUTPUT_OPTION)
+
+./xstrerror.$(objext): $(srcdir)/xstrerror.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xstrerror.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xstrerror.c $(OUTPUT_OPTION)
+
+./xstrndup.$(objext): $(srcdir)/xstrndup.c config.h $(INCDIR)/ansidecl.h \
+	$(INCDIR)/libiberty.h
+	if [ x"$(PICFLAG)" != x ]; then \
+	  $(COMPILE.c) $(PICFLAG) $(srcdir)/xstrndup.c -o pic/$@; \
+	else true; fi
+	$(COMPILE.c) $(srcdir)/xstrndup.c $(OUTPUT_OPTION)
+
diff -Nur gcc-4.9.4.orig/libiberty/required-list gcc-4.9.4/libiberty/required-list
--- gcc-4.9.4.orig/libiberty/required-list	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/required-list	2016-08-08 20:37:53.866593761 +0200
@@ -0,0 +1 @@
+./regex.o ./cplus-dem.o ./cp-demangle.o ./md5.o ./sha1.o ./alloca.o ./argv.o ./choose-temp.o ./concat.o ./cp-demint.o ./crc32.o ./dwarfnames.o ./dyn-string.o ./fdmatch.o ./fibheap.o ./filename_cmp.o ./floatformat.o ./fnmatch.o ./fopen_unlocked.o ./getopt.o ./getopt1.o ./getpwd.o ./getruntime.o ./hashtab.o ./hex.o ./lbasename.o ./lrealpath.o ./make-relative-prefix.o ./make-temp-file.o ./objalloc.o ./obstack.o ./partition.o ./pexecute.o ./physmem.o ./pex-common.o ./pex-one.o ./pex-unix.o ./safe-ctype.o ./simple-object.o ./simple-object-coff.o ./simple-object-elf.o ./simple-object-mach-o.o ./simple-object-xcoff.o ./sort.o ./spaces.o ./splay-tree.o ./stack-limit.o ./strerror.o ./strsignal.o ./timeval-utils.o ./unlink-if-ordinary.o ./xatexit.o ./xexit.o ./xmalloc.o ./xmemdup.o ./xstrdup.o ./xstrerror.o ./xstrndup.o
diff -Nur gcc-4.9.4.orig/libiberty/stamp-h gcc-4.9.4/libiberty/stamp-h
--- gcc-4.9.4.orig/libiberty/stamp-h	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/stamp-h	2016-08-08 20:37:53.866593761 +0200
@@ -0,0 +1 @@
+timestamp
diff -Nur gcc-4.9.4.orig/libiberty/testsuite/Makefile gcc-4.9.4/libiberty/testsuite/Makefile
--- gcc-4.9.4.orig/libiberty/testsuite/Makefile	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/testsuite/Makefile	2016-08-08 20:37:53.870593915 +0200
@@ -0,0 +1,92 @@
+#
+# Makefile
+#   Copyright (C) 1999, 2002, 2006
+#   Free Software Foundation
+#
+# This file is part of the libiberty library.
+# Libiberty is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# Libiberty is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with libiberty; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+
+# This file was written by Tom Tromey <tromey@cygnus.com>.
+
+#
+# Makefile for libiberty/testsuite directory
+#
+
+srcdir = .
+
+
+SHELL = /bin/sh
+
+CC = /home/users/kito/toolchain/nds32le-linux-glibc-v3/bin/nds32le-linux-gcc
+CFLAGS = -g -O2
+LIBCFLAGS = $(CFLAGS)
+
+# Multilib support variables.
+MULTISRCTOP =
+
+INCDIR=$(srcdir)/../$(MULTISRCTOP)../include
+
+all:
+
+# CHECK is set to "really_check" or the empty string by configure.
+check: really-check
+
+really-check: check-cplus-dem check-pexecute check-expandargv
+
+# Run some tests of the demangler.
+check-cplus-dem: test-demangle $(srcdir)/demangle-expected
+	./test-demangle < $(srcdir)/demangle-expected
+
+# Check the pexecute code.
+check-pexecute: test-pexecute
+	./test-pexecute
+
+# Check the expandargv functionality
+check-expandargv: test-expandargv
+	./test-expandargv
+
+TEST_COMPILE = $(CC) -DHAVE_CONFIG_H $(LIBCFLAGS) -I.. -I$(INCDIR) $(HDEFINES)
+test-demangle: $(srcdir)/test-demangle.c ../libiberty.a
+	$(TEST_COMPILE) -o test-demangle \
+		$(srcdir)/test-demangle.c ../libiberty.a
+
+test-pexecute: $(srcdir)/test-pexecute.c ../libiberty.a
+	$(TEST_COMPILE) -DHAVE_CONFIG_H -I.. -o test-pexecute \
+		$(srcdir)/test-pexecute.c ../libiberty.a
+
+test-expandargv: $(srcdir)/test-expandargv.c ../libiberty.a
+	$(TEST_COMPILE) -DHAVE_CONFIG_H -I.. -o test-expandargv \
+		$(srcdir)/test-expandargv.c ../libiberty.a
+
+# Standard (either GNU or Cygnus) rules we don't use.
+html install-html info install-info clean-info dvi pdf install-pdf \
+install etags tags installcheck:
+
+# The standard clean rules.
+mostlyclean:
+	rm -f test-demangle
+	rm -f test-pexecute
+	rm -f test-expandargv
+	rm -f core
+clean: mostlyclean
+distclean: clean
+	rm -f Makefile
+maintainer-clean realclean: distclean
+
+Makefile: $(srcdir)/Makefile.in ../config.status
+	CONFIG_FILES=testsuite/Makefile CONFIG_HEADERS= \
+	  cd .. && $(SHELL) ./config.status
diff -Nur gcc-4.9.4.orig/libiberty/xhost-mkfrag gcc-4.9.4/libiberty/xhost-mkfrag
--- gcc-4.9.4.orig/libiberty/xhost-mkfrag	1970-01-01 01:00:00.000000000 +0100
+++ gcc-4.9.4/libiberty/xhost-mkfrag	2016-08-08 20:37:53.870593915 +0200
@@ -0,0 +1,2 @@
+# Warning: this fragment is automatically generated
+enable_shared = no