summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaldemar Brodkorb <wbx@openadk.org>2021-04-09 14:34:40 (GMT)
committerWaldemar Brodkorb <wbx@openadk.org>2021-04-09 14:34:40 (GMT)
commit34f64dab4829227378994f1a7f8d60d13eb33652 (patch)
tree6f13f38a32aeed8b4e7291c752f01adf449a38e9
parentd62765dac74142468588229405896043b79bf32d (diff)
gcc: fix sparc toolchain, patch from buildroot
-rw-r--r--toolchain/gcc/patches/10.3.0/revert-sparc.patch23628
1 files changed, 23628 insertions, 0 deletions
diff --git a/toolchain/gcc/patches/10.3.0/revert-sparc.patch b/toolchain/gcc/patches/10.3.0/revert-sparc.patch
new file mode 100644
index 0000000..5df11ce
--- /dev/null
+++ b/toolchain/gcc/patches/10.3.0/revert-sparc.patch
@@ -0,0 +1,23628 @@
+diff -Nur gcc-10.3.0.orig/gcc/config/sparc/sparc.c gcc-10.3.0/gcc/config/sparc/sparc.c
+--- gcc-10.3.0.orig/gcc/config/sparc/sparc.c 2021-04-08 13:56:28.201742273 +0200
++++ gcc-10.3.0/gcc/config/sparc/sparc.c 2021-04-09 07:51:37.884501308 +0200
+@@ -4157,6 +4157,13 @@
+ static bool
+ sparc_cannot_force_const_mem (machine_mode mode, rtx x)
+ {
++ /* After IRA has run in PIC mode, it is too late to put anything into the
++ constant pool if the PIC register hasn't already been initialized. */
++ if ((lra_in_progress || reload_in_progress)
++ && flag_pic
++ && !crtl->uses_pic_offset_table)
++ return true;
++
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+@@ -4192,11 +4199,9 @@
+ }
+
+ /* Global Offset Table support. */
+-static GTY(()) rtx got_symbol_rtx = NULL_RTX;
+-static GTY(()) rtx got_register_rtx = NULL_RTX;
+ static GTY(()) rtx got_helper_rtx = NULL_RTX;
+-
+-static GTY(()) bool got_helper_needed = false;
++static GTY(()) rtx got_register_rtx = NULL_RTX;
++static GTY(()) rtx got_symbol_rtx = NULL_RTX;
+
+ /* Return the SYMBOL_REF for the Global Offset Table. */
+
+@@ -4209,6 +4214,27 @@
+ return got_symbol_rtx;
+ }
+
++#ifdef HAVE_GAS_HIDDEN
++# define USE_HIDDEN_LINKONCE 1
++#else
++# define USE_HIDDEN_LINKONCE 0
++#endif
++
++static void
++get_pc_thunk_name (char name[32], unsigned int regno)
++{
++ const char *reg_name = reg_names[regno];
++
++ /* Skip the leading '%' as that cannot be used in a
++ symbol name. */
++ reg_name += 1;
++
++ if (USE_HIDDEN_LINKONCE)
++ sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
++ else
++ ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
++}
++
+ /* Wrapper around the load_pcrel_sym{si,di} patterns. */
+
+ static rtx
+@@ -4228,78 +4254,30 @@
+ return insn;
+ }
+
+-/* Output the load_pcrel_sym{si,di} patterns. */
+-
+-const char *
+-output_load_pcrel_sym (rtx *operands)
+-{
+- if (flag_delayed_branch)
+- {
+- output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
+- output_asm_insn ("call\t%a2", operands);
+- output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
+- }
+- else
+- {
+- output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
+- output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
+- output_asm_insn ("call\t%a2", operands);
+- output_asm_insn (" nop", NULL);
+- }
+-
+- if (operands[2] == got_helper_rtx)
+- got_helper_needed = true;
+-
+- return "";
+-}
+-
+-#ifdef HAVE_GAS_HIDDEN
+-# define USE_HIDDEN_LINKONCE 1
+-#else
+-# define USE_HIDDEN_LINKONCE 0
+-#endif
+-
+ /* Emit code to load the GOT register. */
+
+ void
+ load_got_register (void)
+ {
+- rtx insn;
++ if (!got_register_rtx)
++ got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
+
+ if (TARGET_VXWORKS_RTP)
+- {
+- if (!got_register_rtx)
+- got_register_rtx = pic_offset_table_rtx;
+-
+- insn = gen_vxworks_load_got ();
+- }
++ emit_insn (gen_vxworks_load_got ());
+ else
+ {
+- if (!got_register_rtx)
+- got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
+-
+ /* The GOT symbol is subject to a PC-relative relocation so we need a
+ helper function to add the PC value and thus get the final value. */
+ if (!got_helper_rtx)
+ {
+ char name[32];
+-
+- /* Skip the leading '%' as that cannot be used in a symbol name. */
+- if (USE_HIDDEN_LINKONCE)
+- sprintf (name, "__sparc_get_pc_thunk.%s",
+- reg_names[REGNO (got_register_rtx)] + 1);
+- else
+- ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
+- REGNO (got_register_rtx));
+-
++ get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
+ got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+ }
+
+- insn
+- = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
++ emit_insn (gen_load_pcrel_sym (got_register_rtx, sparc_got (),
++ got_helper_rtx));
+ }
+-
+- emit_insn (insn);
+ }
+
+ /* Ensure that we are not using patterns that are not OK with PIC. */
+@@ -5464,7 +5442,7 @@
+ return true;
+
+ /* GOT register (%l7) if needed. */
+- if (got_register_rtx && regno == REGNO (got_register_rtx))
++ if (regno == GLOBAL_OFFSET_TABLE_REGNUM && got_register_rtx)
+ return true;
+
+ /* If the function accesses prior frames, the frame pointer and the return
+@@ -12507,9 +12485,10 @@
+ sparc_file_end (void)
+ {
+ /* If we need to emit the special GOT helper function, do so now. */
+- if (got_helper_needed)
++ if (got_helper_rtx)
+ {
+ const char *name = XSTR (got_helper_rtx, 0);
++ const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
+ #ifdef DWARF2_UNWIND_INFO
+ bool do_cfi;
+ #endif
+@@ -12546,22 +12525,17 @@
+ #ifdef DWARF2_UNWIND_INFO
+ do_cfi = dwarf2out_do_cfi_asm ();
+ if (do_cfi)
+- output_asm_insn (".cfi_startproc", NULL);
++ fprintf (asm_out_file, "\t.cfi_startproc\n");
+ #endif
+ if (flag_delayed_branch)
+- {
+- output_asm_insn ("jmp\t%%o7+8", NULL);
+- output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
+- }
++ fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
++ reg_name, reg_name);
+ else
+- {
+- output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
+- output_asm_insn ("jmp\t%%o7+8", NULL);
+- output_asm_insn (" nop", NULL);
+- }
++ fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
++ reg_name, reg_name);
+ #ifdef DWARF2_UNWIND_INFO
+ if (do_cfi)
+- output_asm_insn (".cfi_endproc", NULL);
++ fprintf (asm_out_file, "\t.cfi_endproc\n");
+ #endif
+ }
+
+@@ -13056,10 +13030,7 @@
+ edge entry_edge;
+ rtx_insn *seq;
+
+- /* In PIC mode, we need to always initialize the PIC register if optimization
+- is enabled, because we are called from IRA and LRA may later force things
+- to the constant pool for optimization purposes. */
+- if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
++ if (!crtl->uses_pic_offset_table)
+ return;
+
+ start_sequence ();
+diff -Nur gcc-10.3.0.orig/gcc/config/sparc/sparc.c.orig gcc-10.3.0/gcc/config/sparc/sparc.c.orig
+--- gcc-10.3.0.orig/gcc/config/sparc/sparc.c.orig 1970-01-01 01:00:00.000000000 +0100
++++ gcc-10.3.0/gcc/config/sparc/sparc.c.orig 2021-04-08 13:56:28.201742273 +0200
+@@ -0,0 +1,13813 @@
++/* Subroutines for insn-output.c for SPARC.
++ Copyright (C) 1987-2020 Free Software Foundation, Inc.
++ Contributed by Michael Tiemann (tiemann@cygnus.com)
++ 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
++ at Cygnus Support.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3. If not see
++<http://www.gnu.org/licenses/>. */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "rtl.h"
++#include "tree.h"
++#include "memmodel.h"
++#include "gimple.h"
++#include "df.h"
++#include "tm_p.h"
++#include "stringpool.h"
++#include "attribs.h"
++#include "expmed.h"
++#include "optabs.h"
++#include "regs.h"
++#include "emit-rtl.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "alias.h"
++#include "fold-const.h"
++#include "stor-layout.h"
++#include "calls.h"
++#include "varasm.h"
++#include "output.h"
++#include "insn-attr.h"
++#include "explow.h"
++#include "expr.h"
++#include "debug.h"
++#include "cfgrtl.h"
++#include "common/common-target.h"
++#include "gimplify.h"
++#include "langhooks.h"
++#include "reload.h"
++#include "tree-pass.h"
++#include "context.h"
++#include "builtins.h"
++#include "tree-vector-builder.h"
++#include "opts.h"
++
++/* This file should be included last. */
++#include "target-def.h"
++
++/* Processor costs */
++
++struct processor_costs {
++ /* Integer load */
++ const int int_load;
++
++ /* Integer signed load */
++ const int int_sload;
++
++ /* Integer zeroed load */
++ const int int_zload;
++
++ /* Float load */
++ const int float_load;
++
++ /* fmov, fneg, fabs */
++ const int float_move;
++
++ /* fadd, fsub */
++ const int float_plusminus;
++
++ /* fcmp */
++ const int float_cmp;
++
++ /* fmov, fmovr */
++ const int float_cmove;
++
++ /* fmul */
++ const int float_mul;
++
++ /* fdivs */
++ const int float_div_sf;
++
++ /* fdivd */
++ const int float_div_df;
++
++ /* fsqrts */
++ const int float_sqrt_sf;
++
++ /* fsqrtd */
++ const int float_sqrt_df;
++
++ /* umul/smul */
++ const int int_mul;
++
++ /* mulX */
++ const int int_mulX;
++
++ /* integer multiply cost for each bit set past the most
++ significant 3, so the formula for multiply cost becomes:
++
++ if (rs1 < 0)
++ highest_bit = highest_clear_bit(rs1);
++ else
++ highest_bit = highest_set_bit(rs1);
++ if (highest_bit < 3)
++ highest_bit = 3;
++ cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
++
++ A value of zero indicates that the multiply costs is fixed,
++ and not variable. */
++ const int int_mul_bit_factor;
++
++ /* udiv/sdiv */
++ const int int_div;
++
++ /* divX */
++ const int int_divX;
++
++ /* movcc, movr */
++ const int int_cmove;
++
++ /* penalty for shifts, due to scheduling rules etc. */
++ const int shift_penalty;
++
++ /* cost of a (predictable) branch. */
++ const int branch_cost;
++};
++
++static const
++struct processor_costs cypress_costs = {
++ COSTS_N_INSNS (2), /* int load */
++ COSTS_N_INSNS (2), /* int signed load */
++ COSTS_N_INSNS (2), /* int zeroed load */
++ COSTS_N_INSNS (2), /* float load */
++ COSTS_N_INSNS (5), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (5), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (7), /* fmul */
++ COSTS_N_INSNS (37), /* fdivs */
++ COSTS_N_INSNS (37), /* fdivd */
++ COSTS_N_INSNS (63), /* fsqrts */
++ COSTS_N_INSNS (63), /* fsqrtd */
++ COSTS_N_INSNS (1), /* imul */
++ COSTS_N_INSNS (1), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (1), /* idiv */
++ COSTS_N_INSNS (1), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs supersparc_costs = {
++ COSTS_N_INSNS (1), /* int load */
++ COSTS_N_INSNS (1), /* int signed load */
++ COSTS_N_INSNS (1), /* int zeroed load */
++ COSTS_N_INSNS (0), /* float load */
++ COSTS_N_INSNS (3), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (3), /* fadd, fsub */
++ COSTS_N_INSNS (3), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (3), /* fmul */
++ COSTS_N_INSNS (6), /* fdivs */
++ COSTS_N_INSNS (9), /* fdivd */
++ COSTS_N_INSNS (12), /* fsqrts */
++ COSTS_N_INSNS (12), /* fsqrtd */
++ COSTS_N_INSNS (4), /* imul */
++ COSTS_N_INSNS (4), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (4), /* idiv */
++ COSTS_N_INSNS (4), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 1, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs hypersparc_costs = {
++ COSTS_N_INSNS (1), /* int load */
++ COSTS_N_INSNS (1), /* int signed load */
++ COSTS_N_INSNS (1), /* int zeroed load */
++ COSTS_N_INSNS (1), /* float load */
++ COSTS_N_INSNS (1), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (1), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (1), /* fmul */
++ COSTS_N_INSNS (8), /* fdivs */
++ COSTS_N_INSNS (12), /* fdivd */
++ COSTS_N_INSNS (17), /* fsqrts */
++ COSTS_N_INSNS (17), /* fsqrtd */
++ COSTS_N_INSNS (17), /* imul */
++ COSTS_N_INSNS (17), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (17), /* idiv */
++ COSTS_N_INSNS (17), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs leon_costs = {
++ COSTS_N_INSNS (1), /* int load */
++ COSTS_N_INSNS (1), /* int signed load */
++ COSTS_N_INSNS (1), /* int zeroed load */
++ COSTS_N_INSNS (1), /* float load */
++ COSTS_N_INSNS (1), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (1), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (1), /* fmul */
++ COSTS_N_INSNS (15), /* fdivs */
++ COSTS_N_INSNS (15), /* fdivd */
++ COSTS_N_INSNS (23), /* fsqrts */
++ COSTS_N_INSNS (23), /* fsqrtd */
++ COSTS_N_INSNS (5), /* imul */
++ COSTS_N_INSNS (5), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (5), /* idiv */
++ COSTS_N_INSNS (5), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs leon3_costs = {
++ COSTS_N_INSNS (1), /* int load */
++ COSTS_N_INSNS (1), /* int signed load */
++ COSTS_N_INSNS (1), /* int zeroed load */
++ COSTS_N_INSNS (1), /* float load */
++ COSTS_N_INSNS (1), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (1), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (1), /* fmul */
++ COSTS_N_INSNS (14), /* fdivs */
++ COSTS_N_INSNS (15), /* fdivd */
++ COSTS_N_INSNS (22), /* fsqrts */
++ COSTS_N_INSNS (23), /* fsqrtd */
++ COSTS_N_INSNS (5), /* imul */
++ COSTS_N_INSNS (5), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (35), /* idiv */
++ COSTS_N_INSNS (35), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs sparclet_costs = {
++ COSTS_N_INSNS (3), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (1), /* int zeroed load */
++ COSTS_N_INSNS (1), /* float load */
++ COSTS_N_INSNS (1), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (1), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (1), /* fmov, fmovr */
++ COSTS_N_INSNS (1), /* fmul */
++ COSTS_N_INSNS (1), /* fdivs */
++ COSTS_N_INSNS (1), /* fdivd */
++ COSTS_N_INSNS (1), /* fsqrts */
++ COSTS_N_INSNS (1), /* fsqrtd */
++ COSTS_N_INSNS (5), /* imul */
++ COSTS_N_INSNS (5), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (5), /* idiv */
++ COSTS_N_INSNS (5), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 3 /* branch cost */
++};
++
++static const
++struct processor_costs ultrasparc_costs = {
++ COSTS_N_INSNS (2), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (2), /* int zeroed load */
++ COSTS_N_INSNS (2), /* float load */
++ COSTS_N_INSNS (1), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (4), /* fadd, fsub */
++ COSTS_N_INSNS (1), /* fcmp */
++ COSTS_N_INSNS (2), /* fmov, fmovr */
++ COSTS_N_INSNS (4), /* fmul */
++ COSTS_N_INSNS (13), /* fdivs */
++ COSTS_N_INSNS (23), /* fdivd */
++ COSTS_N_INSNS (13), /* fsqrts */
++ COSTS_N_INSNS (23), /* fsqrtd */
++ COSTS_N_INSNS (4), /* imul */
++ COSTS_N_INSNS (4), /* imulX */
++ 2, /* imul bit factor */
++ COSTS_N_INSNS (37), /* idiv */
++ COSTS_N_INSNS (68), /* idivX */
++ COSTS_N_INSNS (2), /* movcc/movr */
++ 2, /* shift penalty */
++ 2 /* branch cost */
++};
++
++static const
++struct processor_costs ultrasparc3_costs = {
++ COSTS_N_INSNS (2), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (3), /* int zeroed load */
++ COSTS_N_INSNS (2), /* float load */
++ COSTS_N_INSNS (3), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (4), /* fadd, fsub */
++ COSTS_N_INSNS (5), /* fcmp */
++ COSTS_N_INSNS (3), /* fmov, fmovr */
++ COSTS_N_INSNS (4), /* fmul */
++ COSTS_N_INSNS (17), /* fdivs */
++ COSTS_N_INSNS (20), /* fdivd */
++ COSTS_N_INSNS (20), /* fsqrts */
++ COSTS_N_INSNS (29), /* fsqrtd */
++ COSTS_N_INSNS (6), /* imul */
++ COSTS_N_INSNS (6), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (40), /* idiv */
++ COSTS_N_INSNS (71), /* idivX */
++ COSTS_N_INSNS (2), /* movcc/movr */
++ 0, /* shift penalty */
++ 2 /* branch cost */
++};
++
++static const
++struct processor_costs niagara_costs = {
++ COSTS_N_INSNS (3), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (3), /* int zeroed load */
++ COSTS_N_INSNS (9), /* float load */
++ COSTS_N_INSNS (8), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (8), /* fadd, fsub */
++ COSTS_N_INSNS (26), /* fcmp */
++ COSTS_N_INSNS (8), /* fmov, fmovr */
++ COSTS_N_INSNS (29), /* fmul */
++ COSTS_N_INSNS (54), /* fdivs */
++ COSTS_N_INSNS (83), /* fdivd */
++ COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
++ COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
++ COSTS_N_INSNS (11), /* imul */
++ COSTS_N_INSNS (11), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (72), /* idiv */
++ COSTS_N_INSNS (72), /* idivX */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 4 /* branch cost */
++};
++
++static const
++struct processor_costs niagara2_costs = {
++ COSTS_N_INSNS (3), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (3), /* int zeroed load */
++ COSTS_N_INSNS (3), /* float load */
++ COSTS_N_INSNS (6), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (6), /* fadd, fsub */
++ COSTS_N_INSNS (6), /* fcmp */
++ COSTS_N_INSNS (6), /* fmov, fmovr */
++ COSTS_N_INSNS (6), /* fmul */
++ COSTS_N_INSNS (19), /* fdivs */
++ COSTS_N_INSNS (33), /* fdivd */
++ COSTS_N_INSNS (19), /* fsqrts */
++ COSTS_N_INSNS (33), /* fsqrtd */
++ COSTS_N_INSNS (5), /* imul */
++ COSTS_N_INSNS (5), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
++ COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 5 /* branch cost */
++};
++
++static const
++struct processor_costs niagara3_costs = {
++ COSTS_N_INSNS (3), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (3), /* int zeroed load */
++ COSTS_N_INSNS (3), /* float load */
++ COSTS_N_INSNS (9), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (9), /* fadd, fsub */
++ COSTS_N_INSNS (9), /* fcmp */
++ COSTS_N_INSNS (9), /* fmov, fmovr */
++ COSTS_N_INSNS (9), /* fmul */
++ COSTS_N_INSNS (23), /* fdivs */
++ COSTS_N_INSNS (37), /* fdivd */
++ COSTS_N_INSNS (23), /* fsqrts */
++ COSTS_N_INSNS (37), /* fsqrtd */
++ COSTS_N_INSNS (9), /* imul */
++ COSTS_N_INSNS (9), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
++ COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 5 /* branch cost */
++};
++
++static const
++struct processor_costs niagara4_costs = {
++ COSTS_N_INSNS (5), /* int load */
++ COSTS_N_INSNS (5), /* int signed load */
++ COSTS_N_INSNS (5), /* int zeroed load */
++ COSTS_N_INSNS (5), /* float load */
++ COSTS_N_INSNS (11), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (11), /* fadd, fsub */
++ COSTS_N_INSNS (11), /* fcmp */
++ COSTS_N_INSNS (11), /* fmov, fmovr */
++ COSTS_N_INSNS (11), /* fmul */
++ COSTS_N_INSNS (24), /* fdivs */
++ COSTS_N_INSNS (37), /* fdivd */
++ COSTS_N_INSNS (24), /* fsqrts */
++ COSTS_N_INSNS (37), /* fsqrtd */
++ COSTS_N_INSNS (12), /* imul */
++ COSTS_N_INSNS (12), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
++ COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 2 /* branch cost */
++};
++
++static const
++struct processor_costs niagara7_costs = {
++ COSTS_N_INSNS (5), /* int load */
++ COSTS_N_INSNS (5), /* int signed load */
++ COSTS_N_INSNS (5), /* int zeroed load */
++ COSTS_N_INSNS (5), /* float load */
++ COSTS_N_INSNS (11), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (11), /* fadd, fsub */
++ COSTS_N_INSNS (11), /* fcmp */
++ COSTS_N_INSNS (11), /* fmov, fmovr */
++ COSTS_N_INSNS (11), /* fmul */
++ COSTS_N_INSNS (24), /* fdivs */
++ COSTS_N_INSNS (37), /* fdivd */
++ COSTS_N_INSNS (24), /* fsqrts */
++ COSTS_N_INSNS (37), /* fsqrtd */
++ COSTS_N_INSNS (12), /* imul */
++ COSTS_N_INSNS (12), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
++ COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 1 /* branch cost */
++};
++
++static const
++struct processor_costs m8_costs = {
++ COSTS_N_INSNS (3), /* int load */
++ COSTS_N_INSNS (3), /* int signed load */
++ COSTS_N_INSNS (3), /* int zeroed load */
++ COSTS_N_INSNS (3), /* float load */
++ COSTS_N_INSNS (9), /* fmov, fneg, fabs */
++ COSTS_N_INSNS (9), /* fadd, fsub */
++ COSTS_N_INSNS (9), /* fcmp */
++ COSTS_N_INSNS (9), /* fmov, fmovr */
++ COSTS_N_INSNS (9), /* fmul */
++ COSTS_N_INSNS (26), /* fdivs */
++ COSTS_N_INSNS (30), /* fdivd */
++ COSTS_N_INSNS (33), /* fsqrts */
++ COSTS_N_INSNS (41), /* fsqrtd */
++ COSTS_N_INSNS (12), /* imul */
++ COSTS_N_INSNS (10), /* imulX */
++ 0, /* imul bit factor */
++ COSTS_N_INSNS (57), /* udiv/sdiv */
++ COSTS_N_INSNS (30), /* udivx/sdivx */
++ COSTS_N_INSNS (1), /* movcc/movr */
++ 0, /* shift penalty */
++ 1 /* branch cost */
++};
++
++static const struct processor_costs *sparc_costs = &cypress_costs;
++
++#ifdef HAVE_AS_RELAX_OPTION
++/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
++ "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
++ With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
++ somebody does not branch between the sethi and jmp. */
++#define LEAF_SIBCALL_SLOT_RESERVED_P 1
++#else
++#define LEAF_SIBCALL_SLOT_RESERVED_P \
++ ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
++#endif
++
++/* Vector to say how input registers are mapped to output registers.
++ HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
++ eliminate it. You must use -fomit-frame-pointer to get that. */
++char leaf_reg_remap[] =
++{ 0, 1, 2, 3, 4, 5, 6, 7,
++ -1, -1, -1, -1, -1, -1, 14, -1,
++ -1, -1, -1, -1, -1, -1, -1, -1,
++ 8, 9, 10, 11, 12, 13, -1, 15,
++
++ 32, 33, 34, 35, 36, 37, 38, 39,
++ 40, 41, 42, 43, 44, 45, 46, 47,
++ 48, 49, 50, 51, 52, 53, 54, 55,
++ 56, 57, 58, 59, 60, 61, 62, 63,
++ 64, 65, 66, 67, 68, 69, 70, 71,
++ 72, 73, 74, 75, 76, 77, 78, 79,
++ 80, 81, 82, 83, 84, 85, 86, 87,
++ 88, 89, 90, 91, 92, 93, 94, 95,
++ 96, 97, 98, 99, 100, 101, 102};
++
++/* Vector, indexed by hard register number, which contains 1
++ for a register that is allowable in a candidate for leaf
++ function treatment. */
++char sparc_leaf_regs[] =
++{ 1, 1, 1, 1, 1, 1, 1, 1,
++ 0, 0, 0, 0, 0, 0, 1, 0,
++ 0, 0, 0, 0, 0, 0, 0, 0,
++ 1, 1, 1, 1, 1, 1, 0, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1, 1,
++ 1, 1, 1, 1, 1, 1, 1};
++
++struct GTY(()) machine_function
++{
++ /* Size of the frame of the function. */
++ HOST_WIDE_INT frame_size;
++
++ /* Size of the frame of the function minus the register window save area
++ and the outgoing argument area. */
++ HOST_WIDE_INT apparent_frame_size;
++
++ /* Register we pretend the frame pointer is allocated to. Normally, this
++ is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
++ record "offset" separately as it may be too big for (reg + disp). */
++ rtx frame_base_reg;
++ HOST_WIDE_INT frame_base_offset;
++
++ /* Number of global or FP registers to be saved (as 4-byte quantities). */
++ int n_global_fp_regs;
++
++ /* True if the current function is leaf and uses only leaf regs,
++ so that the SPARC leaf function optimization can be applied.
++ Private version of crtl->uses_only_leaf_regs, see
++ sparc_expand_prologue for the rationale. */
++ int leaf_function_p;
++
++ /* True if the prologue saves local or in registers. */
++ bool save_local_in_regs_p;
++
++ /* True if the data calculated by sparc_expand_prologue are valid. */
++ bool prologue_data_valid_p;
++};
++
++#define sparc_frame_size cfun->machine->frame_size
++#define sparc_apparent_frame_size cfun->machine->apparent_frame_size
++#define sparc_frame_base_reg cfun->machine->frame_base_reg
++#define sparc_frame_base_offset cfun->machine->frame_base_offset
++#define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
++#define sparc_leaf_function_p cfun->machine->leaf_function_p
++#define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
++#define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
++
++/* 1 if the next opcode is to be specially indented. */
++int sparc_indent_opcode = 0;
++
++static void sparc_option_override (void);
++static void sparc_init_modes (void);
++static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
++ const_tree, bool, bool, int *, int *);
++
++static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
++static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
++
++static void sparc_emit_set_const32 (rtx, rtx);
++static void sparc_emit_set_const64 (rtx, rtx);
++static void sparc_output_addr_vec (rtx);
++static void sparc_output_addr_diff_vec (rtx);
++static void sparc_output_deferred_case_vectors (void);
++static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
++static bool sparc_legitimate_constant_p (machine_mode, rtx);
++static rtx sparc_builtin_saveregs (void);
++static int epilogue_renumber (rtx *, int);
++static bool sparc_assemble_integer (rtx, unsigned int, int);
++static int set_extends (rtx_insn *);
++static void sparc_asm_function_prologue (FILE *);
++static void sparc_asm_function_epilogue (FILE *);
++#ifdef TARGET_SOLARIS
++static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
++ tree) ATTRIBUTE_UNUSED;
++#endif
++static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
++static int sparc_issue_rate (void);
++static void sparc_sched_init (FILE *, int, int);
++static int sparc_use_sched_lookahead (void);
++
++static void emit_soft_tfmode_libcall (const char *, int, rtx *);
++static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
++static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
++static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
++static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
++
++static bool sparc_function_ok_for_sibcall (tree, tree);
++static void sparc_init_libfuncs (void);
++static void sparc_init_builtins (void);
++static void sparc_fpu_init_builtins (void);
++static void sparc_vis_init_builtins (void);
++static tree sparc_builtin_decl (unsigned, bool);
++static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
++static tree sparc_fold_builtin (tree, int, tree *, bool);
++static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
++ HOST_WIDE_INT, tree);
++static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
++ HOST_WIDE_INT, const_tree);
++static struct machine_function * sparc_init_machine_status (void);
++static bool sparc_cannot_force_const_mem (machine_mode, rtx);
++static rtx sparc_tls_get_addr (void);
++static rtx sparc_tls_got (void);
++static int sparc_register_move_cost (machine_mode,
++ reg_class_t, reg_class_t);
++static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
++static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
++ int *, const_tree, int);
++static bool sparc_strict_argument_naming (cumulative_args_t);
++static void sparc_va_start (tree, rtx);
++static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
++static bool sparc_vector_mode_supported_p (machine_mode);
++static bool sparc_tls_referenced_p (rtx);
++static rtx sparc_legitimize_tls_address (rtx);
++static rtx sparc_legitimize_pic_address (rtx, rtx);
++static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
++static rtx sparc_delegitimize_address (rtx);
++static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
++static bool sparc_pass_by_reference (cumulative_args_t,
++ const function_arg_info &);
++static void sparc_function_arg_advance (cumulative_args_t,
++ const function_arg_info &);
++static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
++static rtx sparc_function_incoming_arg (cumulative_args_t,
++ const function_arg_info &);
++static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
++static unsigned int sparc_function_arg_boundary (machine_mode,
++ const_tree);
++static int sparc_arg_partial_bytes (cumulative_args_t,
++ const function_arg_info &);
++static bool sparc_return_in_memory (const_tree, const_tree);
++static rtx sparc_struct_value_rtx (tree, int);
++static rtx sparc_function_value (const_tree, const_tree, bool);
++static rtx sparc_libcall_value (machine_mode, const_rtx);
++static bool sparc_function_value_regno_p (const unsigned int);
++static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
++static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
++static void sparc_file_end (void);
++static bool sparc_frame_pointer_required (void);
++static bool sparc_can_eliminate (const int, const int);
++static void sparc_conditional_register_usage (void);
++static bool sparc_use_pseudo_pic_reg (void);
++static void sparc_init_pic_reg (void);
++#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++static const char *sparc_mangle_type (const_tree);
++#endif
++static void sparc_trampoline_init (rtx, tree, rtx);
++static machine_mode sparc_preferred_simd_mode (scalar_mode);
++static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
++static bool sparc_lra_p (void);
++static bool sparc_print_operand_punct_valid_p (unsigned char);
++static void sparc_print_operand (FILE *, rtx, int);
++static void sparc_print_operand_address (FILE *, machine_mode, rtx);
++static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
++ machine_mode,
++ secondary_reload_info *);
++static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
++ reg_class_t);
++static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
++static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
++static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
++static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
++static unsigned int sparc_min_arithmetic_precision (void);
++static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
++static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
++static bool sparc_modes_tieable_p (machine_mode, machine_mode);
++static bool sparc_can_change_mode_class (machine_mode, machine_mode,
++ reg_class_t);
++static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
++static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
++ const vec_perm_indices &);
++static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
++
++#ifdef SUBTARGET_ATTRIBUTE_TABLE
++/* Table of valid machine attributes. */
++static const struct attribute_spec sparc_attribute_table[] =
++{
++ /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
++ do_diagnostic, handler, exclude } */
++ SUBTARGET_ATTRIBUTE_TABLE,
++ { NULL, 0, 0, false, false, false, false, NULL, NULL }
++};
++#endif
++
++char sparc_hard_reg_printed[8];
++
++/* Initialize the GCC target structure. */
++
++/* The default is to use .half rather than .short for aligned HI objects. */
++#undef TARGET_ASM_ALIGNED_HI_OP
++#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
++
++#undef TARGET_ASM_UNALIGNED_HI_OP
++#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
++#undef TARGET_ASM_UNALIGNED_SI_OP
++#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
++#undef TARGET_ASM_UNALIGNED_DI_OP
++#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
++
++/* The target hook has to handle DI-mode values. */
++#undef TARGET_ASM_INTEGER
++#define TARGET_ASM_INTEGER sparc_assemble_integer
++
++#undef TARGET_ASM_FUNCTION_PROLOGUE
++#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
++#undef TARGET_ASM_FUNCTION_EPILOGUE
++#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
++
++#undef TARGET_SCHED_ADJUST_COST
++#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
++#undef TARGET_SCHED_ISSUE_RATE
++#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
++#undef TARGET_SCHED_INIT
++#define TARGET_SCHED_INIT sparc_sched_init
++#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
++#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
++
++#undef TARGET_FUNCTION_OK_FOR_SIBCALL
++#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
++
++#undef TARGET_INIT_LIBFUNCS
++#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
++
++#undef TARGET_LEGITIMIZE_ADDRESS
++#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
++#undef TARGET_DELEGITIMIZE_ADDRESS
++#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
++#undef TARGET_MODE_DEPENDENT_ADDRESS_P
++#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
++
++#undef TARGET_INIT_BUILTINS
++#define TARGET_INIT_BUILTINS sparc_init_builtins
++#undef TARGET_BUILTIN_DECL
++#define TARGET_BUILTIN_DECL sparc_builtin_decl
++#undef TARGET_EXPAND_BUILTIN
++#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
++#undef TARGET_FOLD_BUILTIN
++#define TARGET_FOLD_BUILTIN sparc_fold_builtin
++
++#if TARGET_TLS
++#undef TARGET_HAVE_TLS
++#define TARGET_HAVE_TLS true
++#endif
++
++#undef TARGET_CANNOT_FORCE_CONST_MEM
++#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
++
++#undef TARGET_ASM_OUTPUT_MI_THUNK
++#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
++
++#undef TARGET_RTX_COSTS
++#define TARGET_RTX_COSTS sparc_rtx_costs
++#undef TARGET_ADDRESS_COST
++#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
++#undef TARGET_REGISTER_MOVE_COST
++#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
++
++#undef TARGET_PROMOTE_FUNCTION_MODE
++#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
++#undef TARGET_STRICT_ARGUMENT_NAMING
++#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
++
++#undef TARGET_MUST_PASS_IN_STACK
++#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
++#undef TARGET_PASS_BY_REFERENCE
++#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
++#undef TARGET_ARG_PARTIAL_BYTES
++#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
++#undef TARGET_FUNCTION_ARG_ADVANCE
++#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
++#undef TARGET_FUNCTION_ARG
++#define TARGET_FUNCTION_ARG sparc_function_arg
++#undef TARGET_FUNCTION_INCOMING_ARG
++#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
++#undef TARGET_FUNCTION_ARG_PADDING
++#define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
++#undef TARGET_FUNCTION_ARG_BOUNDARY
++#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
++
++#undef TARGET_RETURN_IN_MEMORY
++#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
++#undef TARGET_STRUCT_VALUE_RTX
++#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
++#undef TARGET_FUNCTION_VALUE
++#define TARGET_FUNCTION_VALUE sparc_function_value
++#undef TARGET_LIBCALL_VALUE
++#define TARGET_LIBCALL_VALUE sparc_libcall_value
++#undef TARGET_FUNCTION_VALUE_REGNO_P
++#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
++
++#undef TARGET_EXPAND_BUILTIN_SAVEREGS
++#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
++
++#undef TARGET_ASAN_SHADOW_OFFSET
++#define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
++
++#undef TARGET_EXPAND_BUILTIN_VA_START
++#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
++#undef TARGET_GIMPLIFY_VA_ARG_EXPR
++#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
++
++#undef TARGET_VECTOR_MODE_SUPPORTED_P
++#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
++
++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
++
++#ifdef SUBTARGET_INSERT_ATTRIBUTES
++#undef TARGET_INSERT_ATTRIBUTES
++#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
++#endif
++
++#ifdef SUBTARGET_ATTRIBUTE_TABLE
++#undef TARGET_ATTRIBUTE_TABLE
++#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
++#endif
++
++#undef TARGET_OPTION_OVERRIDE
++#define TARGET_OPTION_OVERRIDE sparc_option_override
++
++#ifdef TARGET_THREAD_SSP_OFFSET
++#undef TARGET_STACK_PROTECT_GUARD
++#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
++#endif
++
++#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
++#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
++#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
++#endif
++
++#undef TARGET_ASM_FILE_END
++#define TARGET_ASM_FILE_END sparc_file_end
++
++#undef TARGET_FRAME_POINTER_REQUIRED
++#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
++
++#undef TARGET_CAN_ELIMINATE
++#define TARGET_CAN_ELIMINATE sparc_can_eliminate
++
++#undef TARGET_PREFERRED_RELOAD_CLASS
++#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
++
++#undef TARGET_SECONDARY_RELOAD
++#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
++#undef TARGET_SECONDARY_MEMORY_NEEDED
++#define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
++#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
++#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
++
++#undef TARGET_CONDITIONAL_REGISTER_USAGE
++#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
++
++#undef TARGET_INIT_PIC_REG
++#define TARGET_INIT_PIC_REG sparc_init_pic_reg
++
++#undef TARGET_USE_PSEUDO_PIC_REG
++#define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
++
++#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++#undef TARGET_MANGLE_TYPE
++#define TARGET_MANGLE_TYPE sparc_mangle_type
++#endif
++
++#undef TARGET_LRA_P
++#define TARGET_LRA_P sparc_lra_p
++
++#undef TARGET_LEGITIMATE_ADDRESS_P
++#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
++
++#undef TARGET_LEGITIMATE_CONSTANT_P
++#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
++
++#undef TARGET_TRAMPOLINE_INIT
++#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
++
++#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
++#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
++#undef TARGET_PRINT_OPERAND
++#define TARGET_PRINT_OPERAND sparc_print_operand
++#undef TARGET_PRINT_OPERAND_ADDRESS
++#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
++
++/* The value stored by LDSTUB. */
++#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
++#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
++
++#undef TARGET_CSTORE_MODE
++#define TARGET_CSTORE_MODE sparc_cstore_mode
++
++#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
++#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
++
++#undef TARGET_FIXED_CONDITION_CODE_REGS
++#define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
++
++#undef TARGET_MIN_ARITHMETIC_PRECISION
++#define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
++
++#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
++#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
++
++#undef TARGET_HARD_REGNO_NREGS
++#define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
++#undef TARGET_HARD_REGNO_MODE_OK
++#define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
++
++#undef TARGET_MODES_TIEABLE_P
++#define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
++
++#undef TARGET_CAN_CHANGE_MODE_CLASS
++#define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
++
++#undef TARGET_CONSTANT_ALIGNMENT
++#define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
++
++#undef TARGET_VECTORIZE_VEC_PERM_CONST
++#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
++
++#undef TARGET_CAN_FOLLOW_JUMP
++#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
++
++struct gcc_target targetm = TARGET_INITIALIZER;
++
++/* Return the memory reference contained in X if any, zero otherwise. */
++
++static rtx
++mem_ref (rtx x)
++{
++ if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
++ x = XEXP (x, 0);
++
++ if (MEM_P (x))
++ return x;
++
++ return NULL_RTX;
++}
++
++/* True if any of INSN's source register(s) is REG. */
++
++static bool
++insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
++{
++ extract_insn (insn);
++ return ((REG_P (recog_data.operand[1])
++ && REGNO (recog_data.operand[1]) == reg)
++ || (recog_data.n_operands == 3
++ && REG_P (recog_data.operand[2])
++ && REGNO (recog_data.operand[2]) == reg));
++}
++
++/* True if INSN is a floating-point division or square-root. */
++
++static bool
++div_sqrt_insn_p (rtx_insn *insn)
++{
++ if (GET_CODE (PATTERN (insn)) != SET)
++ return false;
++
++ switch (get_attr_type (insn))
++ {
++ case TYPE_FPDIVS:
++ case TYPE_FPSQRTS:
++ case TYPE_FPDIVD:
++ case TYPE_FPSQRTD:
++ return true;
++ default:
++ return false;
++ }
++}
++
++/* True if INSN is a floating-point instruction. */
++
++static bool
++fpop_insn_p (rtx_insn *insn)
++{
++ if (GET_CODE (PATTERN (insn)) != SET)
++ return false;
++
++ switch (get_attr_type (insn))
++ {
++ case TYPE_FPMOVE:
++ case TYPE_FPCMOVE:
++ case TYPE_FP:
++ case TYPE_FPCMP:
++ case TYPE_FPMUL:
++ case TYPE_FPDIVS:
++ case TYPE_FPSQRTS:
++ case TYPE_FPDIVD:
++ case TYPE_FPSQRTD:
++ return true;
++ default:
++ return false;
++ }
++}
++
++/* True if INSN is an atomic instruction. */
++
++static bool
++atomic_insn_for_leon3_p (rtx_insn *insn)
++{
++ switch (INSN_CODE (insn))
++ {
++ case CODE_FOR_swapsi:
++ case CODE_FOR_ldstub:
++ case CODE_FOR_atomic_compare_and_swap_leon3_1:
++ return true;
++ default:
++ return false;
++ }
++}
++
++/* We use a machine specific pass to enable workarounds for errata.
++
++ We need to have the (essentially) final form of the insn stream in order
++ to properly detect the various hazards. Therefore, this machine specific
++ pass runs as late as possible. */
++
++/* True if INSN is a md pattern or asm statement. */
++#define USEFUL_INSN_P(INSN) \
++ (NONDEBUG_INSN_P (INSN) \
++ && GET_CODE (PATTERN (INSN)) != USE \
++ && GET_CODE (PATTERN (INSN)) != CLOBBER)
++
++static unsigned int
++sparc_do_work_around_errata (void)
++{
++ rtx_insn *insn, *next;
++
++ /* Force all instructions to be split into their final form. */
++ split_all_insns_noflow ();
++
++ /* Now look for specific patterns in the insn stream. */
++ for (insn = get_insns (); insn; insn = next)
++ {
++ bool insert_nop = false;
++ rtx set;
++ rtx_insn *jump;
++ rtx_sequence *seq;
++
++ /* Look into the instruction in a delay slot. */
++ if (NONJUMP_INSN_P (insn)
++ && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
++ {
++ jump = seq->insn (0);
++ insn = seq->insn (1);
++ }
++ else if (JUMP_P (insn))
++ jump = insn;
++ else
++ jump = NULL;
++
++ /* Place a NOP at the branch target of an integer branch if it is a
++ floating-point operation or a floating-point branch. */
++ if (sparc_fix_gr712rc
++ && jump
++ && jump_to_label_p (jump)
++ && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
++ {
++ rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
++ if (target
++ && (fpop_insn_p (target)
++ || (JUMP_P (target)
++ && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
++ emit_insn_before (gen_nop (), target);
++ }
++
++ /* Insert a NOP between load instruction and atomic instruction. Insert
++ a NOP at branch target if there is a load in delay slot and an atomic
++ instruction at branch target. */
++ if (sparc_fix_ut700
++ && NONJUMP_INSN_P (insn)
++ && (set = single_set (insn)) != NULL_RTX
++ && mem_ref (SET_SRC (set))
++ && REG_P (SET_DEST (set)))
++ {
++ if (jump && jump_to_label_p (jump))
++ {
++ rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
++ if (target && atomic_insn_for_leon3_p (target))
++ emit_insn_before (gen_nop (), target);
++ }
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++
++ if (atomic_insn_for_leon3_p (next))
++ insert_nop = true;
++ }
++
++ /* Look for a sequence that starts with a fdiv or fsqrt instruction and
++ ends with another fdiv or fsqrt instruction with no dependencies on
++ the former, along with an appropriate pattern in between. */
++ if (sparc_fix_lost_divsqrt
++ && NONJUMP_INSN_P (insn)
++ && div_sqrt_insn_p (insn))
++ {
++ int i;
++ int fp_found = 0;
++ rtx_insn *after;
++
++ const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++
++ for (after = next, i = 0; i < 4; i++)
++ {
++ /* Count floating-point operations. */
++ if (i != 3 && fpop_insn_p (after))
++ {
++ /* If the insn uses the destination register of
++ the div/sqrt, then it cannot be problematic. */
++ if (insn_uses_reg_p (after, dest_reg))
++ break;
++ fp_found++;
++ }
++
++ /* Count floating-point loads. */
++ if (i != 3
++ && (set = single_set (after)) != NULL_RTX
++ && REG_P (SET_DEST (set))
++ && REGNO (SET_DEST (set)) > 31)
++ {
++ /* If the insn uses the destination register of
++ the div/sqrt, then it cannot be problematic. */
++ if (REGNO (SET_DEST (set)) == dest_reg)
++ break;
++ fp_found++;
++ }
++
++ /* Check if this is a problematic sequence. */
++ if (i > 1
++ && fp_found >= 2
++ && div_sqrt_insn_p (after))
++ {
++ /* If this is the short version of the problematic
++ sequence we add two NOPs in a row to also prevent
++ the long version. */
++ if (i == 2)
++ emit_insn_before (gen_nop (), next);
++ insert_nop = true;
++ break;
++ }
++
++ /* No need to scan past a second div/sqrt. */
++ if (div_sqrt_insn_p (after))
++ break;
++
++ /* Insert NOP before branch. */
++ if (i < 3
++ && (!NONJUMP_INSN_P (after)
++ || GET_CODE (PATTERN (after)) == SEQUENCE))
++ {
++ insert_nop = true;
++ break;
++ }
++
++ after = next_active_insn (after);
++ if (!after)
++ break;
++ }
++ }
++
++ /* Look for either of these two sequences:
++
++ Sequence A:
++ 1. store of word size or less (e.g. st / stb / sth / stf)
++ 2. any single instruction that is not a load or store
++ 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
++
++ Sequence B:
++ 1. store of double word size (e.g. std / stdf)
++ 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
++ if (sparc_fix_b2bst
++ && NONJUMP_INSN_P (insn)
++ && (set = single_set (insn)) != NULL_RTX
++ && MEM_P (SET_DEST (set)))
++ {
++ /* Sequence B begins with a double-word store. */
++ bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
++ rtx_insn *after;
++ int i;
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++
++ for (after = next, i = 0; i < 2; i++)
++ {
++ /* Skip empty assembly statements. */
++ if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
++ || (USEFUL_INSN_P (after)
++ && (asm_noperands (PATTERN (after))>=0)
++ && !strcmp (decode_asm_operands (PATTERN (after),
++ NULL, NULL, NULL,
++ NULL, NULL), "")))
++ after = next_active_insn (after);
++ if (!after)
++ break;
++
++ /* If the insn is a branch, then it cannot be problematic. */
++ if (!NONJUMP_INSN_P (after)
++ || GET_CODE (PATTERN (after)) == SEQUENCE)
++ break;
++
++ /* Sequence B is only two instructions long. */
++ if (seq_b)
++ {
++ /* Add NOP if followed by a store. */
++ if ((set = single_set (after)) != NULL_RTX
++ && MEM_P (SET_DEST (set)))
++ insert_nop = true;
++
++ /* Otherwise it is ok. */
++ break;
++ }
++
++ /* If the second instruction is a load or a store,
++ then the sequence cannot be problematic. */
++ if (i == 0)
++ {
++ if ((set = single_set (after)) != NULL_RTX
++ && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
++ break;
++
++ after = next_active_insn (after);
++ if (!after)
++ break;
++ }
++
++ /* Add NOP if third instruction is a store. */
++ if (i == 1
++ && (set = single_set (after)) != NULL_RTX
++ && MEM_P (SET_DEST (set)))
++ insert_nop = true;
++ }
++ }
++
++ /* Look for a single-word load into an odd-numbered FP register. */
++ else if (sparc_fix_at697f
++ && NONJUMP_INSN_P (insn)
++ && (set = single_set (insn)) != NULL_RTX
++ && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
++ && mem_ref (SET_SRC (set))
++ && REG_P (SET_DEST (set))
++ && REGNO (SET_DEST (set)) > 31
++ && REGNO (SET_DEST (set)) % 2 != 0)
++ {
++ /* The wrong dependency is on the enclosing double register. */
++ const unsigned int x = REGNO (SET_DEST (set)) - 1;
++ unsigned int src1, src2, dest;
++ int code;
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++ /* If the insn is a branch, then it cannot be problematic. */
++ if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
++ continue;
++
++ extract_insn (next);
++ code = INSN_CODE (next);
++
++ switch (code)
++ {
++ case CODE_FOR_adddf3:
++ case CODE_FOR_subdf3:
++ case CODE_FOR_muldf3:
++ case CODE_FOR_divdf3:
++ dest = REGNO (recog_data.operand[0]);
++ src1 = REGNO (recog_data.operand[1]);
++ src2 = REGNO (recog_data.operand[2]);
++ if (src1 != src2)
++ {
++ /* Case [1-4]:
++ ld [address], %fx+1
++ FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
++ if ((src1 == x || src2 == x)
++ && (dest == src1 || dest == src2))
++ insert_nop = true;
++ }
++ else
++ {
++ /* Case 5:
++ ld [address], %fx+1
++ FPOPd %fx, %fx, %fx */
++ if (src1 == x
++ && dest == src1
++ && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
++ insert_nop = true;
++ }
++ break;
++
++ case CODE_FOR_sqrtdf2:
++ dest = REGNO (recog_data.operand[0]);
++ src1 = REGNO (recog_data.operand[1]);
++ /* Case 6:
++ ld [address], %fx+1
++ fsqrtd %fx, %fx */
++ if (src1 == x && dest == src1)
++ insert_nop = true;
++ break;
++
++ default:
++ break;
++ }
++ }
++
++ /* Look for a single-word load into an integer register. */
++ else if (sparc_fix_ut699
++ && NONJUMP_INSN_P (insn)
++ && (set = single_set (insn)) != NULL_RTX
++ && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
++ && (mem_ref (SET_SRC (set)) != NULL_RTX
++ || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
++ && REG_P (SET_DEST (set))
++ && REGNO (SET_DEST (set)) < 32)
++ {
++ /* There is no problem if the second memory access has a data
++ dependency on the first single-cycle load. */
++ rtx x = SET_DEST (set);
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++ /* If the insn is a branch, then it cannot be problematic. */
++ if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
++ continue;
++
++ /* Look for a second memory access to/from an integer register. */
++ if ((set = single_set (next)) != NULL_RTX)
++ {
++ rtx src = SET_SRC (set);
++ rtx dest = SET_DEST (set);
++ rtx mem;
++
++ /* LDD is affected. */
++ if ((mem = mem_ref (src)) != NULL_RTX
++ && REG_P (dest)
++ && REGNO (dest) < 32
++ && !reg_mentioned_p (x, XEXP (mem, 0)))
++ insert_nop = true;
++
++ /* STD is *not* affected. */
++ else if (MEM_P (dest)
++ && GET_MODE_SIZE (GET_MODE (dest)) <= 4
++ && (src == CONST0_RTX (GET_MODE (dest))
++ || (REG_P (src)
++ && REGNO (src) < 32
++ && REGNO (src) != REGNO (x)))
++ && !reg_mentioned_p (x, XEXP (dest, 0)))
++ insert_nop = true;
++
++ /* GOT accesses uses LD. */
++ else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
++ && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
++ insert_nop = true;
++ }
++ }
++
++ /* Look for a single-word load/operation into an FP register. */
++ else if (sparc_fix_ut699
++ && NONJUMP_INSN_P (insn)
++ && (set = single_set (insn)) != NULL_RTX
++ && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
++ && REG_P (SET_DEST (set))
++ && REGNO (SET_DEST (set)) > 31)
++ {
++ /* Number of instructions in the problematic window. */
++ const int n_insns = 4;
++ /* The problematic combination is with the sibling FP register. */
++ const unsigned int x = REGNO (SET_DEST (set));
++ const unsigned int y = x ^ 1;
++ rtx_insn *after;
++ int i;
++
++ next = next_active_insn (insn);
++ if (!next)
++ break;
++ /* If the insn is a branch, then it cannot be problematic. */
++ if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
++ continue;
++
++ /* Look for a second load/operation into the sibling FP register. */
++ if (!((set = single_set (next)) != NULL_RTX
++ && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
++ && REG_P (SET_DEST (set))
++ && REGNO (SET_DEST (set)) == y))
++ continue;
++
++ /* Look for a (possible) store from the FP register in the next N
++ instructions, but bail out if it is again modified or if there
++ is a store from the sibling FP register before this store. */
++ for (after = next, i = 0; i < n_insns; i++)
++ {
++ bool branch_p;
++
++ after = next_active_insn (after);
++ if (!after)
++ break;
++
++ /* This is a branch with an empty delay slot. */
++ if (!NONJUMP_INSN_P (after))
++ {
++ if (++i == n_insns)
++ break;
++ branch_p = true;
++ after = NULL;
++ }
++ /* This is a branch with a filled delay slot. */
++ else if (rtx_sequence *seq =
++ dyn_cast <rtx_sequence *> (PATTERN (after)))
++ {
++ if (++i == n_insns)
++ break;
++ branch_p = true;
++ after = seq->insn (1);
++ }
++ /* This is a regular instruction. */
++ else
++ branch_p = false;
++
++ if (after && (set = single_set (after)) != NULL_RTX)
++ {
++ const rtx src = SET_SRC (set);
++ const rtx dest = SET_DEST (set);
++ const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
++
++ /* If the FP register is again modified before the store,
++ then the store isn't affected. */
++ if (REG_P (dest)
++ && (REGNO (dest) == x
++ || (REGNO (dest) == y && size == 8)))
++ break;
++
++ if (MEM_P (dest) && REG_P (src))
++ {
++ /* If there is a store from the sibling FP register
++ before the store, then the store is not affected. */
++ if (REGNO (src) == y || (REGNO (src) == x && size == 8))
++ break;
++
++ /* Otherwise, the store is affected. */
++ if (REGNO (src) == x && size == 4)
++ {
++ insert_nop = true;
++ break;
++ }
++ }
++ }
++
++ /* If we have a branch in the first M instructions, then we
++ cannot see the (M+2)th instruction so we play safe. */
++ if (branch_p && i <= (n_insns - 2))
++ {
++ insert_nop = true;
++ break;
++ }
++ }
++ }
++
++ else
++ next = NEXT_INSN (insn);
++
++ if (insert_nop)
++ emit_insn_before (gen_nop (), next);
++ }
++
++ return 0;
++}
++
++namespace {
++
++const pass_data pass_data_work_around_errata =
++{
++ RTL_PASS, /* type */
++ "errata", /* name */
++ OPTGROUP_NONE, /* optinfo_flags */
++ TV_MACH_DEP, /* tv_id */
++ 0, /* properties_required */
++ 0, /* properties_provided */
++ 0, /* properties_destroyed */
++ 0, /* todo_flags_start */
++ 0, /* todo_flags_finish */
++};
++
++class pass_work_around_errata : public rtl_opt_pass
++{
++public:
++ pass_work_around_errata(gcc::context *ctxt)
++ : rtl_opt_pass(pass_data_work_around_errata, ctxt)
++ {}
++
++ /* opt_pass methods: */
++ virtual bool gate (function *)
++ {
++ return sparc_fix_at697f
++ || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
++ || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
++ }
++
++ virtual unsigned int execute (function *)
++ {
++ return sparc_do_work_around_errata ();
++ }
++
++}; // class pass_work_around_errata
++
++} // anon namespace
++
++rtl_opt_pass *
++make_pass_work_around_errata (gcc::context *ctxt)
++{
++ return new pass_work_around_errata (ctxt);
++}
++
++/* Helpers for TARGET_DEBUG_OPTIONS. */
++static void
++dump_target_flag_bits (const int flags)
++{
++ if (flags & MASK_64BIT)
++ fprintf (stderr, "64BIT ");
++ if (flags & MASK_APP_REGS)
++ fprintf (stderr, "APP_REGS ");
++ if (flags & MASK_FASTER_STRUCTS)
++ fprintf (stderr, "FASTER_STRUCTS ");
++ if (flags & MASK_FLAT)
++ fprintf (stderr, "FLAT ");
++ if (flags & MASK_FMAF)
++ fprintf (stderr, "FMAF ");
++ if (flags & MASK_FSMULD)
++ fprintf (stderr, "FSMULD ");
++ if (flags & MASK_FPU)
++ fprintf (stderr, "FPU ");
++ if (flags & MASK_HARD_QUAD)
++ fprintf (stderr, "HARD_QUAD ");
++ if (flags & MASK_POPC)
++ fprintf (stderr, "POPC ");
++ if (flags & MASK_PTR64)
++ fprintf (stderr, "PTR64 ");
++ if (flags & MASK_STACK_BIAS)
++ fprintf (stderr, "STACK_BIAS ");
++ if (flags & MASK_UNALIGNED_DOUBLES)
++ fprintf (stderr, "UNALIGNED_DOUBLES ");
++ if (flags & MASK_V8PLUS)
++ fprintf (stderr, "V8PLUS ");
++ if (flags & MASK_VIS)
++ fprintf (stderr, "VIS ");
++ if (flags & MASK_VIS2)
++ fprintf (stderr, "VIS2 ");
++ if (flags & MASK_VIS3)
++ fprintf (stderr, "VIS3 ");
++ if (flags & MASK_VIS4)
++ fprintf (stderr, "VIS4 ");
++ if (flags & MASK_VIS4B)
++ fprintf (stderr, "VIS4B ");
++ if (flags & MASK_CBCOND)
++ fprintf (stderr, "CBCOND ");
++ if (flags & MASK_DEPRECATED_V8_INSNS)
++ fprintf (stderr, "DEPRECATED_V8_INSNS ");
++ if (flags & MASK_SPARCLET)
++ fprintf (stderr, "SPARCLET ");
++ if (flags & MASK_SPARCLITE)
++ fprintf (stderr, "SPARCLITE ");
++ if (flags & MASK_V8)
++ fprintf (stderr, "V8 ");
++ if (flags & MASK_V9)
++ fprintf (stderr, "V9 ");
++}
++
++static void
++dump_target_flags (const char *prefix, const int flags)
++{
++ fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
++ dump_target_flag_bits (flags);
++ fprintf(stderr, "]\n");
++}
++
++/* Validate and override various options, and do some machine dependent
++ initialization. */
++
++static void
++sparc_option_override (void)
++{
++ /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
++ static struct cpu_default {
++ const int cpu;
++ const enum sparc_processor_type processor;
++ } const cpu_default[] = {
++ /* There must be one entry here for each TARGET_CPU value. */
++ { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
++ { TARGET_CPU_v8, PROCESSOR_V8 },
++ { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
++ { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
++ { TARGET_CPU_leon, PROCESSOR_LEON },
++ { TARGET_CPU_leon3, PROCESSOR_LEON3 },
++ { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
++ { TARGET_CPU_sparclite, PROCESSOR_F930 },
++ { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
++ { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
++ { TARGET_CPU_v9, PROCESSOR_V9 },
++ { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
++ { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
++ { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
++ { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
++ { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
++ { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
++ { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
++ { TARGET_CPU_m8, PROCESSOR_M8 },
++ { -1, PROCESSOR_V7 }
++ };
++ const struct cpu_default *def;
++ /* Table of values for -m{cpu,tune}=. This must match the order of
++ the enum processor_type in sparc-opts.h. */
++ static struct cpu_table {
++ const char *const name;
++ const int disable;
++ const int enable;
++ } const cpu_table[] = {
++ { "v7", MASK_ISA, 0 },
++ { "cypress", MASK_ISA, 0 },
++ { "v8", MASK_ISA, MASK_V8 },
++ /* TI TMS390Z55 supersparc */
++ { "supersparc", MASK_ISA, MASK_V8 },
++ { "hypersparc", MASK_ISA, MASK_V8 },
++ { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
++ { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
++ { "leon3v7", MASK_ISA, MASK_LEON3 },
++ { "sparclite", MASK_ISA, MASK_SPARCLITE },
++ /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
++ { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
++ /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
++ { "f934", MASK_ISA, MASK_SPARCLITE },
++ { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
++ { "sparclet", MASK_ISA, MASK_SPARCLET },
++ /* TEMIC sparclet */
++ { "tsc701", MASK_ISA, MASK_SPARCLET },
++ { "v9", MASK_ISA, MASK_V9 },
++ /* UltraSPARC I, II, IIi */
++ { "ultrasparc", MASK_ISA,
++ /* Although insns using %y are deprecated, it is a clear win. */
++ MASK_V9|MASK_DEPRECATED_V8_INSNS },
++ /* UltraSPARC III */
++ /* ??? Check if %y issue still holds true. */
++ { "ultrasparc3", MASK_ISA,
++ MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
++ /* UltraSPARC T1 */
++ { "niagara", MASK_ISA,
++ MASK_V9|MASK_DEPRECATED_V8_INSNS },
++ /* UltraSPARC T2 */
++ { "niagara2", MASK_ISA,
++ MASK_V9|MASK_POPC|MASK_VIS2 },
++ /* UltraSPARC T3 */
++ { "niagara3", MASK_ISA,
++ MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
++ /* UltraSPARC T4 */
++ { "niagara4", MASK_ISA,
++ MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
++ /* UltraSPARC M7 */
++ { "niagara7", MASK_ISA,
++ MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
++ /* UltraSPARC M8 */
++ { "m8", MASK_ISA,
++ MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
++ };
++ const struct cpu_table *cpu;
++ unsigned int i;
++
++ if (sparc_debug_string != NULL)
++ {
++ const char *q;
++ char *p;
++
++ p = ASTRDUP (sparc_debug_string);
++ while ((q = strtok (p, ",")) != NULL)
++ {
++ bool invert;
++ int mask;
++
++ p = NULL;
++ if (*q == '!')
++ {
++ invert = true;
++ q++;
++ }
++ else
++ invert = false;
++
++ if (! strcmp (q, "all"))
++ mask = MASK_DEBUG_ALL;
++ else if (! strcmp (q, "options"))
++ mask = MASK_DEBUG_OPTIONS;
++ else
++ error ("unknown %<-mdebug-%s%> switch", q);
++
++ if (invert)
++ sparc_debug &= ~mask;
++ else
++ sparc_debug |= mask;
++ }
++ }
++
++ /* Enable the FsMULd instruction by default if not explicitly specified by
++ the user. It may be later disabled by the CPU (explicitly or not). */
++ if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
++ target_flags |= MASK_FSMULD;
++
++ if (TARGET_DEBUG_OPTIONS)
++ {
++ dump_target_flags("Initial target_flags", target_flags);
++ dump_target_flags("target_flags_explicit", target_flags_explicit);
++ }
++
++#ifdef SUBTARGET_OVERRIDE_OPTIONS
++ SUBTARGET_OVERRIDE_OPTIONS;
++#endif
++
++#ifndef SPARC_BI_ARCH
++ /* Check for unsupported architecture size. */
++ if (!TARGET_64BIT != DEFAULT_ARCH32_P)
++ error ("%s is not supported by this configuration",
++ DEFAULT_ARCH32_P ? "-m64" : "-m32");
++#endif
++
++ /* We force all 64bit archs to use 128 bit long double */
++ if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
++ {
++ error ("%<-mlong-double-64%> not allowed with %<-m64%>");
++ target_flags |= MASK_LONG_DOUBLE_128;
++ }
++
++ /* Check that -fcall-saved-REG wasn't specified for out registers. */
++ for (i = 8; i < 16; i++)
++ if (!call_used_regs [i])
++ {
++ error ("%<-fcall-saved-REG%> is not supported for out registers");
++ call_used_regs [i] = 1;
++ }
++
++ /* Set the default CPU if no -mcpu option was specified. */
++ if (!global_options_set.x_sparc_cpu_and_features)
++ {
++ for (def = &cpu_default[0]; def->cpu != -1; ++def)
++ if (def->cpu == TARGET_CPU_DEFAULT)
++ break;
++ gcc_assert (def->cpu != -1);
++ sparc_cpu_and_features = def->processor;
++ }
++
++ /* Set the default CPU if no -mtune option was specified. */
++ if (!global_options_set.x_sparc_cpu)
++ sparc_cpu = sparc_cpu_and_features;
++
++ cpu = &cpu_table[(int) sparc_cpu_and_features];
++
++ if (TARGET_DEBUG_OPTIONS)
++ {
++ fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
++ dump_target_flags ("cpu->disable", cpu->disable);
++ dump_target_flags ("cpu->enable", cpu->enable);
++ }
++
++ target_flags &= ~cpu->disable;
++ target_flags |= (cpu->enable
++#ifndef HAVE_AS_FMAF_HPC_VIS3
++ & ~(MASK_FMAF | MASK_VIS3)
++#endif
++#ifndef HAVE_AS_SPARC4
++ & ~MASK_CBCOND
++#endif
++#ifndef HAVE_AS_SPARC5_VIS4
++ & ~(MASK_VIS4 | MASK_SUBXC)
++#endif
++#ifndef HAVE_AS_SPARC6
++ & ~(MASK_VIS4B)
++#endif
++#ifndef HAVE_AS_LEON
++ & ~(MASK_LEON | MASK_LEON3)
++#endif
++ & ~(target_flags_explicit & MASK_FEATURES)
++ );
++
++ /* FsMULd is a V8 instruction. */
++ if (!TARGET_V8 && !TARGET_V9)
++ target_flags &= ~MASK_FSMULD;
++
++ /* -mvis2 implies -mvis. */
++ if (TARGET_VIS2)
++ target_flags |= MASK_VIS;
++
++ /* -mvis3 implies -mvis2 and -mvis. */
++ if (TARGET_VIS3)
++ target_flags |= MASK_VIS2 | MASK_VIS;
++
++ /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
++ if (TARGET_VIS4)
++ target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
++
++ /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
++ if (TARGET_VIS4B)
++ target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
++
++ /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
++ FPU is disabled. */
++ if (!TARGET_FPU)
++ target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
++ | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
++
++ /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
++ are available; -m64 also implies v9. */
++ if (TARGET_VIS || TARGET_ARCH64)
++ {
++ target_flags |= MASK_V9;
++ target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
++ }
++
++ /* -mvis also implies -mv8plus on 32-bit. */
++ if (TARGET_VIS && !TARGET_ARCH64)
++ target_flags |= MASK_V8PLUS;
++
++ /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
++ if (TARGET_V9 && TARGET_ARCH32)
++ target_flags |= MASK_DEPRECATED_V8_INSNS;
++
++ /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
++ if (!TARGET_V9 || TARGET_ARCH64)
++ target_flags &= ~MASK_V8PLUS;
++
++ /* Don't use stack biasing in 32-bit mode. */
++ if (TARGET_ARCH32)
++ target_flags &= ~MASK_STACK_BIAS;
++
++ /* Use LRA instead of reload, unless otherwise instructed. */
++ if (!(target_flags_explicit & MASK_LRA))
++ target_flags |= MASK_LRA;
++
++ /* Enable applicable errata workarounds for LEON3FT. */
++ if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
++ {
++ sparc_fix_b2bst = 1;
++ sparc_fix_lost_divsqrt = 1;
++ }
++
++ /* Disable FsMULd for the UT699 since it doesn't work correctly. */
++ if (sparc_fix_ut699)
++ target_flags &= ~MASK_FSMULD;
++
++#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
++ if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
++ target_flags |= MASK_LONG_DOUBLE_128;
++#endif
++
++ if (TARGET_DEBUG_OPTIONS)
++ dump_target_flags ("Final target_flags", target_flags);
++
++ /* Set the code model if no -mcmodel option was specified. */
++ if (global_options_set.x_sparc_code_model)
++ {
++ if (TARGET_ARCH32)
++ error ("%<-mcmodel=%> is not supported in 32-bit mode");
++ }
++ else
++ {
++ if (TARGET_ARCH32)
++ sparc_code_model = CM_32;
++ else
++ sparc_code_model = SPARC_DEFAULT_CMODEL;
++ }
++
++ /* Set the memory model if no -mmemory-model option was specified. */
++ if (!global_options_set.x_sparc_memory_model)
++ {
++ /* Choose the memory model for the operating system. */
++ enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
++ if (os_default != SMM_DEFAULT)
++ sparc_memory_model = os_default;
++ /* Choose the most relaxed model for the processor. */
++ else if (TARGET_V9)
++ sparc_memory_model = SMM_RMO;
++ else if (TARGET_LEON3)
++ sparc_memory_model = SMM_TSO;
++ else if (TARGET_LEON)
++ sparc_memory_model = SMM_SC;
++ else if (TARGET_V8)
++ sparc_memory_model = SMM_PSO;
++ else
++ sparc_memory_model = SMM_SC;
++ }
++
++ /* Supply a default value for align_functions. */
++ if (flag_align_functions && !str_align_functions)
++ {
++ if (sparc_cpu == PROCESSOR_ULTRASPARC
++ || sparc_cpu == PROCESSOR_ULTRASPARC3
++ || sparc_cpu == PROCESSOR_NIAGARA
++ || sparc_cpu == PROCESSOR_NIAGARA2
++ || sparc_cpu == PROCESSOR_NIAGARA3
++ || sparc_cpu == PROCESSOR_NIAGARA4)
++ str_align_functions = "32";
++ else if (sparc_cpu == PROCESSOR_NIAGARA7
++ || sparc_cpu == PROCESSOR_M8)
++ str_align_functions = "64";
++ }
++
++ /* Validate PCC_STRUCT_RETURN. */
++ if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
++ flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
++
++ /* Only use .uaxword when compiling for a 64-bit target. */
++ if (!TARGET_ARCH64)
++ targetm.asm_out.unaligned_op.di = NULL;
++
++ /* Set the processor costs. */
++ switch (sparc_cpu)
++ {
++ case PROCESSOR_V7:
++ case PROCESSOR_CYPRESS:
++ sparc_costs = &cypress_costs;
++ break;
++ case PROCESSOR_V8:
++ case PROCESSOR_SPARCLITE:
++ case PROCESSOR_SUPERSPARC:
++ sparc_costs = &supersparc_costs;
++ break;
++ case PROCESSOR_F930:
++ case PROCESSOR_F934:
++ case PROCESSOR_HYPERSPARC:
++ case PROCESSOR_SPARCLITE86X:
++ sparc_costs = &hypersparc_costs;
++ break;
++ case PROCESSOR_LEON:
++ sparc_costs = &leon_costs;
++ break;
++ case PROCESSOR_LEON3:
++ case PROCESSOR_LEON3V7:
++ sparc_costs = &leon3_costs;
++ break;
++ case PROCESSOR_SPARCLET:
++ case PROCESSOR_TSC701:
++ sparc_costs = &sparclet_costs;
++ break;
++ case PROCESSOR_V9:
++ case PROCESSOR_ULTRASPARC:
++ sparc_costs = &ultrasparc_costs;
++ break;
++ case PROCESSOR_ULTRASPARC3:
++ sparc_costs = &ultrasparc3_costs;
++ break;
++ case PROCESSOR_NIAGARA:
++ sparc_costs = &niagara_costs;
++ break;
++ case PROCESSOR_NIAGARA2:
++ sparc_costs = &niagara2_costs;
++ break;
++ case PROCESSOR_NIAGARA3:
++ sparc_costs = &niagara3_costs;
++ break;
++ case PROCESSOR_NIAGARA4:
++ sparc_costs = &niagara4_costs;
++ break;
++ case PROCESSOR_NIAGARA7:
++ sparc_costs = &niagara7_costs;
++ break;
++ case PROCESSOR_M8:
++ sparc_costs = &m8_costs;
++ break;
++ case PROCESSOR_NATIVE:
++ gcc_unreachable ();
++ };
++
++ /* param_simultaneous_prefetches is the number of prefetches that
++ can run at the same time. More important, it is the threshold
++ defining when additional prefetches will be dropped by the
++ hardware.
++
++ The UltraSPARC-III features a documented prefetch queue with a
++ size of 8. Additional prefetches issued in the cpu are
++ dropped.
++
++ Niagara processors are different. In these processors prefetches
++ are handled much like regular loads. The L1 miss buffer is 32
++ entries, but prefetches start getting affected when 30 entries
++ become occupied. That occupation could be a mix of regular loads
++ and prefetches though. And that buffer is shared by all threads.
++ Once the threshold is reached, if the core is running a single
++ thread the prefetch will retry. If more than one thread is
++ running, the prefetch will be dropped.
++
++ All this makes it very difficult to determine how many
++ simultaneous prefetches can be issued simultaneously, even in a
++ single-threaded program. Experimental results show that setting
++ this parameter to 32 works well when the number of threads is not
++ high. */
++ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++ param_simultaneous_prefetches,
++ ((sparc_cpu == PROCESSOR_ULTRASPARC
++ || sparc_cpu == PROCESSOR_NIAGARA
++ || sparc_cpu == PROCESSOR_NIAGARA2
++ || sparc_cpu == PROCESSOR_NIAGARA3
++ || sparc_cpu == PROCESSOR_NIAGARA4)
++ ? 2
++ : (sparc_cpu == PROCESSOR_ULTRASPARC3
++ ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
++ || sparc_cpu == PROCESSOR_M8)
++ ? 32 : 3))));
++
++ /* param_l1_cache_line_size is the size of the L1 cache line, in
++ bytes.
++
++ The Oracle SPARC Architecture (previously the UltraSPARC
++ Architecture) specification states that when a PREFETCH[A]
++ instruction is executed an implementation-specific amount of data
++ is prefetched, and that it is at least 64 bytes long (aligned to
++ at least 64 bytes).
++
++ However, this is not correct. The M7 (and implementations prior
++ to that) does not guarantee a 64B prefetch into a cache if the
++ line size is smaller. A single cache line is all that is ever
++ prefetched. So for the M7, where the L1D$ has 32B lines and the
++ L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
++ L2 and L3, but only 32B are brought into the L1D$. (Assuming it
++ is a read_n prefetch, which is the only type which allocates to
++ the L1.) */
++ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++ param_l1_cache_line_size,
++ (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
++
++ /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
++ Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
++ Niagara processors feature a L1D$ of 16KB. */
++ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++ param_l1_cache_size,
++ ((sparc_cpu == PROCESSOR_ULTRASPARC
++ || sparc_cpu == PROCESSOR_ULTRASPARC3
++ || sparc_cpu == PROCESSOR_NIAGARA
++ || sparc_cpu == PROCESSOR_NIAGARA2
++ || sparc_cpu == PROCESSOR_NIAGARA3
++ || sparc_cpu == PROCESSOR_NIAGARA4
++ || sparc_cpu == PROCESSOR_NIAGARA7
++ || sparc_cpu == PROCESSOR_M8)
++ ? 16 : 64));
++
++ /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
++ that 512 is the default in params.def. */
++ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++ param_l2_cache_size,
++ ((sparc_cpu == PROCESSOR_NIAGARA4
++ || sparc_cpu == PROCESSOR_M8)
++ ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
++ ? 256 : 512)));
++
++
++ /* Disable save slot sharing for call-clobbered registers by default.
++ The IRA sharing algorithm works on single registers only and this
++ pessimizes for double floating-point registers. */
++ if (!global_options_set.x_flag_ira_share_save_slots)
++ flag_ira_share_save_slots = 0;
++
++ /* Only enable REE by default in 64-bit mode where it helps to eliminate
++ redundant 32-to-64-bit extensions. */
++ if (!global_options_set.x_flag_ree && TARGET_ARCH32)
++ flag_ree = 0;
++
++ /* Do various machine dependent initializations. */
++ sparc_init_modes ();
++
++ /* Set up function hooks. */
++ init_machine_status = sparc_init_machine_status;
++}
++
++/* Miscellaneous utilities. */
++
++/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
++ or branch on register contents instructions. */
++
++int
++v9_regcmp_p (enum rtx_code code)
++{
++ return (code == EQ || code == NE || code == GE || code == LT
++ || code == LE || code == GT);
++}
++
++/* Nonzero if OP is a floating point constant which can
++ be loaded into an integer register using a single
++ sethi instruction. */
++
++int
++fp_sethi_p (rtx op)
++{
++ if (GET_CODE (op) == CONST_DOUBLE)
++ {
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
++ return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
++ }
++
++ return 0;
++}
++
++/* Nonzero if OP is a floating point constant which can
++ be loaded into an integer register using a single
++ mov instruction. */
++
++int
++fp_mov_p (rtx op)
++{
++ if (GET_CODE (op) == CONST_DOUBLE)
++ {
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
++ return SPARC_SIMM13_P (i);
++ }
++
++ return 0;
++}
++
++/* Nonzero if OP is a floating point constant which can
++ be loaded into an integer register using a high/losum
++ instruction sequence. */
++
++int
++fp_high_losum_p (rtx op)
++{
++ /* The constraints calling this should only be in
++ SFmode move insns, so any constant which cannot
++ be moved using a single insn will do. */
++ if (GET_CODE (op) == CONST_DOUBLE)
++ {
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
++ return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
++ }
++
++ return 0;
++}
++
++/* Return true if the address of LABEL can be loaded by means of the
++ mov{si,di}_pic_label_ref patterns in PIC mode. */
++
++static bool
++can_use_mov_pic_label_ref (rtx label)
++{
++ /* VxWorks does not impose a fixed gap between segments; the run-time
++ gap can be different from the object-file gap. We therefore can't
++ assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
++ are absolutely sure that X is in the same segment as the GOT.
++ Unfortunately, the flexibility of linker scripts means that we
++ can't be sure of that in general, so assume that GOT-relative
++ accesses are never valid on VxWorks. */
++ if (TARGET_VXWORKS_RTP)
++ return false;
++
++ /* Similarly, if the label is non-local, it might end up being placed
++ in a different section than the current one; now mov_pic_label_ref
++ requires the label and the code to be in the same section. */
++ if (LABEL_REF_NONLOCAL_P (label))
++ return false;
++
++ /* Finally, if we are reordering basic blocks and partition into hot
++ and cold sections, this might happen for any label. */
++ if (flag_reorder_blocks_and_partition)
++ return false;
++
++ return true;
++}
++
++/* Expand a move instruction. Return true if all work is done. */
++
++bool
++sparc_expand_move (machine_mode mode, rtx *operands)
++{
++ /* Handle sets of MEM first. */
++ if (GET_CODE (operands[0]) == MEM)
++ {
++ /* 0 is a register (or a pair of registers) on SPARC. */
++ if (register_or_zero_operand (operands[1], mode))
++ return false;
++
++ if (!reload_in_progress)
++ {
++ operands[0] = validize_mem (operands[0]);
++ operands[1] = force_reg (mode, operands[1]);
++ }
++ }
++
++ /* Fix up TLS cases. */
++ if (TARGET_HAVE_TLS
++ && CONSTANT_P (operands[1])
++ && sparc_tls_referenced_p (operands [1]))
++ {
++ operands[1] = sparc_legitimize_tls_address (operands[1]);
++ return false;
++ }
++
++ /* Fix up PIC cases. */
++ if (flag_pic && CONSTANT_P (operands[1]))
++ {
++ if (pic_address_needs_scratch (operands[1]))
++ operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
++
++ /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
++ if ((GET_CODE (operands[1]) == LABEL_REF
++ && can_use_mov_pic_label_ref (operands[1]))
++ || (GET_CODE (operands[1]) == CONST
++ && GET_CODE (XEXP (operands[1], 0)) == PLUS
++ && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
++ && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
++ && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
++ {
++ if (mode == SImode)
++ {
++ emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
++ return true;
++ }
++
++ if (mode == DImode)
++ {
++ emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
++ return true;
++ }
++ }
++
++ if (symbolic_operand (operands[1], mode))
++ {
++ operands[1]
++ = sparc_legitimize_pic_address (operands[1],
++ reload_in_progress
++ ? operands[0] : NULL_RTX);
++ return false;
++ }
++ }
++
++ /* If we are trying to toss an integer constant into FP registers,
++ or loading a FP or vector constant, force it into memory. */
++ if (CONSTANT_P (operands[1])
++ && REG_P (operands[0])
++ && (SPARC_FP_REG_P (REGNO (operands[0]))
++ || SCALAR_FLOAT_MODE_P (mode)
++ || VECTOR_MODE_P (mode)))
++ {
++ /* emit_group_store will send such bogosity to us when it is
++ not storing directly into memory. So fix this up to avoid
++ crashes in output_constant_pool. */
++ if (operands [1] == const0_rtx)
++ operands[1] = CONST0_RTX (mode);
++
++ /* We can clear or set to all-ones FP registers if TARGET_VIS, and
++ always other regs. */
++ if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
++ && (const_zero_operand (operands[1], mode)
++ || const_all_ones_operand (operands[1], mode)))
++ return false;
++
++ if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
++ /* We are able to build any SF constant in integer registers
++ with at most 2 instructions. */
++ && (mode == SFmode
++ /* And any DF constant in integer registers if needed. */
++ || (mode == DFmode && !can_create_pseudo_p ())))
++ return false;
++
++ operands[1] = force_const_mem (mode, operands[1]);
++ if (!reload_in_progress)
++ operands[1] = validize_mem (operands[1]);
++ return false;
++ }
++
++ /* Accept non-constants and valid constants unmodified. */
++ if (!CONSTANT_P (operands[1])
++ || GET_CODE (operands[1]) == HIGH
++ || input_operand (operands[1], mode))
++ return false;
++
++ switch (mode)
++ {
++ case E_QImode:
++ /* All QImode constants require only one insn, so proceed. */
++ break;
++
++ case E_HImode:
++ case E_SImode:
++ sparc_emit_set_const32 (operands[0], operands[1]);
++ return true;
++
++ case E_DImode:
++ /* input_operand should have filtered out 32-bit mode. */
++ sparc_emit_set_const64 (operands[0], operands[1]);
++ return true;
++
++ case E_TImode:
++ {
++ rtx high, low;
++ /* TImode isn't available in 32-bit mode. */
++ split_double (operands[1], &high, &low);
++ emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
++ high));
++ emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
++ low));
++ }
++ return true;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ return false;
++}
++
++/* Load OP1, a 32-bit constant, into OP0, a register.
++ We know it can't be done in one insn when we get
++ here, the move expander guarantees this. */
++
++static void
++sparc_emit_set_const32 (rtx op0, rtx op1)
++{
++ machine_mode mode = GET_MODE (op0);
++ rtx temp = op0;
++
++ if (can_create_pseudo_p ())
++ temp = gen_reg_rtx (mode);
++
++ if (GET_CODE (op1) == CONST_INT)
++ {
++ gcc_assert (!small_int_operand (op1, mode)
++ && !const_high_operand (op1, mode));
++
++ /* Emit them as real moves instead of a HIGH/LO_SUM,
++ this way CSE can see everything and reuse intermediate
++ values if it wants. */
++ emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
++ & ~(HOST_WIDE_INT) 0x3ff)));
++
++ emit_insn (gen_rtx_SET (op0,
++ gen_rtx_IOR (mode, temp,
++ GEN_INT (INTVAL (op1) & 0x3ff))));
++ }
++ else
++ {
++ /* A symbol, emit in the traditional way. */
++ emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
++ emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
++ }
++}
++
++/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
++ If TEMP is nonzero, we are forbidden to use any other scratch
++ registers. Otherwise, we are allowed to generate them as needed.
++
++ Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
++ or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
++
++void
++sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
++{
++ rtx cst, temp1, temp2, temp3, temp4, temp5;
++ rtx ti_temp = 0;
++
++ /* Deal with too large offsets. */
++ if (GET_CODE (op1) == CONST
++ && GET_CODE (XEXP (op1, 0)) == PLUS
++ && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
++ && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
++ {
++ gcc_assert (!temp);
++ temp1 = gen_reg_rtx (DImode);
++ temp2 = gen_reg_rtx (DImode);
++ sparc_emit_set_const64 (temp2, cst);
++ sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
++ NULL_RTX);
++ emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
++ return;
++ }
++
++ if (temp && GET_MODE (temp) == TImode)
++ {
++ ti_temp = temp;
++ temp = gen_rtx_REG (DImode, REGNO (temp));
++ }
++
++ /* SPARC-V9 code model support. */
++ switch (sparc_code_model)
++ {
++ case CM_MEDLOW:
++ /* The range spanned by all instructions in the object is less
++ than 2^31 bytes (2GB) and the distance from any instruction
++ to the location of the label _GLOBAL_OFFSET_TABLE_ is less
++ than 2^31 bytes (2GB).
++
++ The executable must be in the low 4TB of the virtual address
++ space.
++
++ sethi %hi(symbol), %temp1
++ or %temp1, %lo(symbol), %reg */
++ if (temp)
++ temp1 = temp; /* op0 is allowed. */
++ else
++ temp1 = gen_reg_rtx (DImode);
++
++ emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
++ emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
++ break;
++
++ case CM_MEDMID:
++ /* The range spanned by all instructions in the object is less
++ than 2^31 bytes (2GB) and the distance from any instruction
++ to the location of the label _GLOBAL_OFFSET_TABLE_ is less
++ than 2^31 bytes (2GB).
++
++ The executable must be in the low 16TB of the virtual address
++ space.
++
++ sethi %h44(symbol), %temp1
++ or %temp1, %m44(symbol), %temp2
++ sllx %temp2, 12, %temp3
++ or %temp3, %l44(symbol), %reg */
++ if (temp)
++ {
++ temp1 = op0;
++ temp2 = op0;
++ temp3 = temp; /* op0 is allowed. */
++ }
++ else
++ {
++ temp1 = gen_reg_rtx (DImode);
++ temp2 = gen_reg_rtx (DImode);
++ temp3 = gen_reg_rtx (DImode);
++ }
++
++ emit_insn (gen_seth44 (temp1, op1));
++ emit_insn (gen_setm44 (temp2, temp1, op1));
++ emit_insn (gen_rtx_SET (temp3,
++ gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
++ emit_insn (gen_setl44 (op0, temp3, op1));
++ break;
++
++ case CM_MEDANY:
++ /* The range spanned by all instructions in the object is less
++ than 2^31 bytes (2GB) and the distance from any instruction
++ to the location of the label _GLOBAL_OFFSET_TABLE_ is less
++ than 2^31 bytes (2GB).
++
++ The executable can be placed anywhere in the virtual address
++ space.
++
++ sethi %hh(symbol), %temp1
++ sethi %lm(symbol), %temp2
++ or %temp1, %hm(symbol), %temp3
++ sllx %temp3, 32, %temp4
++ or %temp4, %temp2, %temp5
++ or %temp5, %lo(symbol), %reg */
++ if (temp)
++ {
++ /* It is possible that one of the registers we got for operands[2]
++ might coincide with that of operands[0] (which is why we made
++ it TImode). Pick the other one to use as our scratch. */
++ if (rtx_equal_p (temp, op0))
++ {
++ gcc_assert (ti_temp);
++ temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
++ }
++ temp1 = op0;
++ temp2 = temp; /* op0 is _not_ allowed, see above. */
++ temp3 = op0;
++ temp4 = op0;
++ temp5 = op0;
++ }
++ else
++ {
++ temp1 = gen_reg_rtx (DImode);
++ temp2 = gen_reg_rtx (DImode);
++ temp3 = gen_reg_rtx (DImode);
++ temp4 = gen_reg_rtx (DImode);
++ temp5 = gen_reg_rtx (DImode);
++ }
++
++ emit_insn (gen_sethh (temp1, op1));
++ emit_insn (gen_setlm (temp2, op1));
++ emit_insn (gen_sethm (temp3, temp1, op1));
++ emit_insn (gen_rtx_SET (temp4,
++ gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
++ emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
++ emit_insn (gen_setlo (op0, temp5, op1));
++ break;
++
++ case CM_EMBMEDANY:
++ /* Old old old backwards compatibility kruft here.
++ Essentially it is MEDLOW with a fixed 64-bit
++ virtual base added to all data segment addresses.
++ Text-segment stuff is computed like MEDANY, we can't
++ reuse the code above because the relocation knobs
++ look different.
++
++ Data segment: sethi %hi(symbol), %temp1
++ add %temp1, EMBMEDANY_BASE_REG, %temp2
++ or %temp2, %lo(symbol), %reg */
++ if (data_segment_operand (op1, GET_MODE (op1)))
++ {
++ if (temp)
++ {
++ temp1 = temp; /* op0 is allowed. */
++ temp2 = op0;
++ }
++ else
++ {
++ temp1 = gen_reg_rtx (DImode);
++ temp2 = gen_reg_rtx (DImode);
++ }
++
++ emit_insn (gen_embmedany_sethi (temp1, op1));
++ emit_insn (gen_embmedany_brsum (temp2, temp1));
++ emit_insn (gen_embmedany_losum (op0, temp2, op1));
++ }
++
++ /* Text segment: sethi %uhi(symbol), %temp1
++ sethi %hi(symbol), %temp2
++ or %temp1, %ulo(symbol), %temp3
++ sllx %temp3, 32, %temp4
++ or %temp4, %temp2, %temp5
++ or %temp5, %lo(symbol), %reg */
++ else
++ {
++ if (temp)
++ {
++ /* It is possible that one of the registers we got for operands[2]
++ might coincide with that of operands[0] (which is why we made
++ it TImode). Pick the other one to use as our scratch. */
++ if (rtx_equal_p (temp, op0))
++ {
++ gcc_assert (ti_temp);
++ temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
++ }
++ temp1 = op0;
++ temp2 = temp; /* op0 is _not_ allowed, see above. */
++ temp3 = op0;
++ temp4 = op0;
++ temp5 = op0;
++ }
++ else
++ {
++ temp1 = gen_reg_rtx (DImode);
++ temp2 = gen_reg_rtx (DImode);
++ temp3 = gen_reg_rtx (DImode);
++ temp4 = gen_reg_rtx (DImode);
++ temp5 = gen_reg_rtx (DImode);
++ }
++
++ emit_insn (gen_embmedany_textuhi (temp1, op1));
++ emit_insn (gen_embmedany_texthi (temp2, op1));
++ emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
++ emit_insn (gen_rtx_SET (temp4,
++ gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
++ emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
++ emit_insn (gen_embmedany_textlo (op0, temp5, op1));
++ }
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++}
++
++/* These avoid problems when cross compiling. If we do not
++ go through all this hair then the optimizer will see
++ invalid REG_EQUAL notes or in some cases none at all. */
++static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
++static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
++static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
++static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
++
++/* The optimizer is not to assume anything about exactly
++ which bits are set for a HIGH, they are unspecified.
++ Unfortunately this leads to many missed optimizations
++ during CSE. We mask out the non-HIGH bits, and matches
++ a plain movdi, to alleviate this problem. */
++static rtx
++gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
++{
++ return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
++}
++
++static rtx
++gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
++{
++ return gen_rtx_SET (dest, GEN_INT (val));
++}
++
++static rtx
++gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
++{
++ return gen_rtx_IOR (DImode, src, GEN_INT (val));
++}
++
++static rtx
++gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
++{
++ return gen_rtx_XOR (DImode, src, GEN_INT (val));
++}
++
++/* Worker routines for 64-bit constant formation on arch64.
++ One of the key things to be doing in these emissions is
++ to create as many temp REGs as possible. This makes it
++ possible for half-built constants to be used later when
++ such values are similar to something required later on.
++ Without doing this, the optimizer cannot see such
++ opportunities. */
++
++static void sparc_emit_set_const64_quick1 (rtx, rtx,
++ unsigned HOST_WIDE_INT, int);
++
++static void
++sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
++ unsigned HOST_WIDE_INT low_bits, int is_neg)
++{
++ unsigned HOST_WIDE_INT high_bits;
++
++ if (is_neg)
++ high_bits = (~low_bits) & 0xffffffff;
++ else
++ high_bits = low_bits;
++
++ emit_insn (gen_safe_HIGH64 (temp, high_bits));
++ if (!is_neg)
++ {
++ emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
++ }
++ else
++ {
++ /* If we are XOR'ing with -1, then we should emit a one's complement
++ instead. This way the combiner will notice logical operations
++ such as ANDN later on and substitute. */
++ if ((low_bits & 0x3ff) == 0x3ff)
++ {
++ emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
++ }
++ else
++ {
++ emit_insn (gen_rtx_SET (op0,
++ gen_safe_XOR64 (temp,
++ (-(HOST_WIDE_INT)0x400
++ | (low_bits & 0x3ff)))));
++ }
++ }
++}
++
++static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
++ unsigned HOST_WIDE_INT, int);
++
++static void
++sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
++ unsigned HOST_WIDE_INT high_bits,
++ unsigned HOST_WIDE_INT low_immediate,
++ int shift_count)
++{
++ rtx temp2 = op0;
++
++ if ((high_bits & 0xfffffc00) != 0)
++ {
++ emit_insn (gen_safe_HIGH64 (temp, high_bits));
++ if ((high_bits & ~0xfffffc00) != 0)
++ emit_insn (gen_rtx_SET (op0,
++ gen_safe_OR64 (temp, (high_bits & 0x3ff))));
++ else
++ temp2 = temp;
++ }
++ else
++ {
++ emit_insn (gen_safe_SET64 (temp, high_bits));
++ temp2 = temp;
++ }
++
++ /* Now shift it up into place. */
++ emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
++ GEN_INT (shift_count))));
++
++ /* If there is a low immediate part piece, finish up by
++ putting that in as well. */
++ if (low_immediate != 0)
++ emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
++}
++
++static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
++ unsigned HOST_WIDE_INT);
++
++/* Full 64-bit constant decomposition. Even though this is the
++ 'worst' case, we still optimize a few things away. */
++static void
++sparc_emit_set_const64_longway (rtx op0, rtx temp,
++ unsigned HOST_WIDE_INT high_bits,
++ unsigned HOST_WIDE_INT low_bits)
++{
++ rtx sub_temp = op0;
++
++ if (can_create_pseudo_p ())
++ sub_temp = gen_reg_rtx (DImode);
++
++ if ((high_bits & 0xfffffc00) != 0)
++ {
++ emit_insn (gen_safe_HIGH64 (temp, high_bits));
++ if ((high_bits & ~0xfffffc00) != 0)
++ emit_insn (gen_rtx_SET (sub_temp,
++ gen_safe_OR64 (temp, (high_bits & 0x3ff))));
++ else
++ sub_temp = temp;
++ }
++ else
++ {
++ emit_insn (gen_safe_SET64 (temp, high_bits));
++ sub_temp = temp;
++ }
++
++ if (can_create_pseudo_p ())
++ {
++ rtx temp2 = gen_reg_rtx (DImode);
++ rtx temp3 = gen_reg_rtx (DImode);
++ rtx temp4 = gen_reg_rtx (DImode);
++
++ emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
++ GEN_INT (32))));
++
++ emit_insn (gen_safe_HIGH64 (temp2, low_bits));
++ if ((low_bits & ~0xfffffc00) != 0)
++ {
++ emit_insn (gen_rtx_SET (temp3,
++ gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
++ emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
++ }
++ else
++ {
++ emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
++ }
++ }
++ else
++ {
++ rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
++ rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
++ rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
++ int to_shift = 12;
++
++ /* We are in the middle of reload, so this is really
++ painful. However we do still make an attempt to
++ avoid emitting truly stupid code. */
++ if (low1 != const0_rtx)
++ {
++ emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
++ GEN_INT (to_shift))));
++ emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
++ sub_temp = op0;
++ to_shift = 12;
++ }
++ else
++ {
++ to_shift += 12;
++ }
++ if (low2 != const0_rtx)
++ {
++ emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
++ GEN_INT (to_shift))));
++ emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
++ sub_temp = op0;
++ to_shift = 8;
++ }
++ else
++ {
++ to_shift += 8;
++ }
++ emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
++ GEN_INT (to_shift))));
++ if (low3 != const0_rtx)
++ emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
++ /* phew... */
++ }
++}
++
++/* Analyze a 64-bit constant for certain properties. */
++static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
++ unsigned HOST_WIDE_INT,
++ int *, int *, int *);
++
++static void
++analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
++ unsigned HOST_WIDE_INT low_bits,
++ int *hbsp, int *lbsp, int *abbasp)
++{
++ int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
++ int i;
++
++ lowest_bit_set = highest_bit_set = -1;
++ i = 0;
++ do
++ {
++ if ((lowest_bit_set == -1)
++ && ((low_bits >> i) & 1))
++ lowest_bit_set = i;
++ if ((highest_bit_set == -1)
++ && ((high_bits >> (32 - i - 1)) & 1))
++ highest_bit_set = (64 - i - 1);
++ }
++ while (++i < 32
++ && ((highest_bit_set == -1)
++ || (lowest_bit_set == -1)));
++ if (i == 32)
++ {
++ i = 0;
++ do
++ {
++ if ((lowest_bit_set == -1)
++ && ((high_bits >> i) & 1))
++ lowest_bit_set = i + 32;
++ if ((highest_bit_set == -1)
++ && ((low_bits >> (32 - i - 1)) & 1))
++ highest_bit_set = 32 - i - 1;
++ }
++ while (++i < 32
++ && ((highest_bit_set == -1)
++ || (lowest_bit_set == -1)));
++ }
++ /* If there are no bits set this should have gone out
++ as one instruction! */
++ gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
++ all_bits_between_are_set = 1;
++ for (i = lowest_bit_set; i <= highest_bit_set; i++)
++ {
++ if (i < 32)
++ {
++ if ((low_bits & (1 << i)) != 0)
++ continue;
++ }
++ else
++ {
++ if ((high_bits & (1 << (i - 32))) != 0)
++ continue;
++ }
++ all_bits_between_are_set = 0;
++ break;
++ }
++ *hbsp = highest_bit_set;
++ *lbsp = lowest_bit_set;
++ *abbasp = all_bits_between_are_set;
++}
++
++static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
++
++static int
++const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
++ unsigned HOST_WIDE_INT low_bits)
++{
++ int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
++
++ if (high_bits == 0
++ || high_bits == 0xffffffff)
++ return 1;
++
++ analyze_64bit_constant (high_bits, low_bits,
++ &highest_bit_set, &lowest_bit_set,
++ &all_bits_between_are_set);
++
++ if ((highest_bit_set == 63
++ || lowest_bit_set == 0)
++ && all_bits_between_are_set != 0)
++ return 1;
++
++ if ((highest_bit_set - lowest_bit_set) < 21)
++ return 1;
++
++ return 0;
++}
++
++static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
++ unsigned HOST_WIDE_INT,
++ int, int);
++
++static unsigned HOST_WIDE_INT
++create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
++ unsigned HOST_WIDE_INT low_bits,
++ int lowest_bit_set, int shift)
++{
++ HOST_WIDE_INT hi, lo;
++
++ if (lowest_bit_set < 32)
++ {
++ lo = (low_bits >> lowest_bit_set) << shift;
++ hi = ((high_bits << (32 - lowest_bit_set)) << shift);
++ }
++ else
++ {
++ lo = 0;
++ hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
++ }
++ gcc_assert (! (hi & lo));
++ return (hi | lo);
++}
++
++/* Here we are sure to be arch64 and this is an integer constant
++ being loaded into a register. Emit the most efficient
++ insn sequence possible. Detection of all the 1-insn cases
++ has been done already. */
++static void
++sparc_emit_set_const64 (rtx op0, rtx op1)
++{
++ unsigned HOST_WIDE_INT high_bits, low_bits;
++ int lowest_bit_set, highest_bit_set;
++ int all_bits_between_are_set;
++ rtx temp = 0;
++
++ /* Sanity check that we know what we are working with. */
++ gcc_assert (TARGET_ARCH64
++ && (GET_CODE (op0) == SUBREG
++ || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
++
++ if (! can_create_pseudo_p ())
++ temp = op0;
++
++ if (GET_CODE (op1) != CONST_INT)
++ {
++ sparc_emit_set_symbolic_const64 (op0, op1, temp);
++ return;
++ }
++
++ if (! temp)
++ temp = gen_reg_rtx (DImode);
++
++ high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
++ low_bits = (INTVAL (op1) & 0xffffffff);
++
++ /* low_bits bits 0 --> 31
++ high_bits bits 32 --> 63 */
++
++ analyze_64bit_constant (high_bits, low_bits,
++ &highest_bit_set, &lowest_bit_set,
++ &all_bits_between_are_set);
++
++ /* First try for a 2-insn sequence. */
++
++ /* These situations are preferred because the optimizer can
++ * do more things with them:
++ * 1) mov -1, %reg
++ * sllx %reg, shift, %reg
++ * 2) mov -1, %reg
++ * srlx %reg, shift, %reg
++ * 3) mov some_small_const, %reg
++ * sllx %reg, shift, %reg
++ */
++ if (((highest_bit_set == 63
++ || lowest_bit_set == 0)
++ && all_bits_between_are_set != 0)
++ || ((highest_bit_set - lowest_bit_set) < 12))
++ {
++ HOST_WIDE_INT the_const = -1;
++ int shift = lowest_bit_set;
++
++ if ((highest_bit_set != 63
++ && lowest_bit_set != 0)
++ || all_bits_between_are_set == 0)
++ {
++ the_const =
++ create_simple_focus_bits (high_bits, low_bits,
++ lowest_bit_set, 0);
++ }
++ else if (lowest_bit_set == 0)
++ shift = -(63 - highest_bit_set);
++
++ gcc_assert (SPARC_SIMM13_P (the_const));
++ gcc_assert (shift != 0);
++
++ emit_insn (gen_safe_SET64 (temp, the_const));
++ if (shift > 0)
++ emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
++ GEN_INT (shift))));
++ else if (shift < 0)
++ emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
++ GEN_INT (-shift))));
++ return;
++ }
++
++ /* Now a range of 22 or less bits set somewhere.
++ * 1) sethi %hi(focus_bits), %reg
++ * sllx %reg, shift, %reg
++ * 2) sethi %hi(focus_bits), %reg
++ * srlx %reg, shift, %reg
++ */
++ if ((highest_bit_set - lowest_bit_set) < 21)
++ {
++ unsigned HOST_WIDE_INT focus_bits =
++ create_simple_focus_bits (high_bits, low_bits,
++ lowest_bit_set, 10);
++
++ gcc_assert (SPARC_SETHI_P (focus_bits));
++ gcc_assert (lowest_bit_set != 10);
++
++ emit_insn (gen_safe_HIGH64 (temp, focus_bits));
++
++ /* If lowest_bit_set == 10 then a sethi alone could have done it. */
++ if (lowest_bit_set < 10)
++ emit_insn (gen_rtx_SET (op0,
++ gen_rtx_LSHIFTRT (DImode, temp,
++ GEN_INT (10 - lowest_bit_set))));
++ else if (lowest_bit_set > 10)
++ emit_insn (gen_rtx_SET (op0,
++ gen_rtx_ASHIFT (DImode, temp,
++ GEN_INT (lowest_bit_set - 10))));
++ return;
++ }
++
++ /* 1) sethi %hi(low_bits), %reg
++ * or %reg, %lo(low_bits), %reg
++ * 2) sethi %hi(~low_bits), %reg
++ * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
++ */
++ if (high_bits == 0
++ || high_bits == 0xffffffff)
++ {
++ sparc_emit_set_const64_quick1 (op0, temp, low_bits,
++ (high_bits == 0xffffffff));
++ return;
++ }
++
++ /* Now, try 3-insn sequences. */
++
++ /* 1) sethi %hi(high_bits), %reg
++ * or %reg, %lo(high_bits), %reg
++ * sllx %reg, 32, %reg
++ */
++ if (low_bits == 0)
++ {
++ sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
++ return;
++ }
++
++ /* We may be able to do something quick
++ when the constant is negated, so try that. */
++ if (const64_is_2insns ((~high_bits) & 0xffffffff,
++ (~low_bits) & 0xfffffc00))
++ {
++ /* NOTE: The trailing bits get XOR'd so we need the
++ non-negated bits, not the negated ones. */
++ unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
++
++ if ((((~high_bits) & 0xffffffff) == 0
++ && ((~low_bits) & 0x80000000) == 0)
++ || (((~high_bits) & 0xffffffff) == 0xffffffff
++ && ((~low_bits) & 0x80000000) != 0))
++ {
++ unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
++
++ if ((SPARC_SETHI_P (fast_int)
++ && (~high_bits & 0xffffffff) == 0)
++ || SPARC_SIMM13_P (fast_int))
++ emit_insn (gen_safe_SET64 (temp, fast_int));
++ else
++ sparc_emit_set_const64 (temp, GEN_INT (fast_int));
++ }
++ else
++ {
++ rtx negated_const;
++ negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
++ (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
++ sparc_emit_set_const64 (temp, negated_const);
++ }
++
++ /* If we are XOR'ing with -1, then we should emit a one's complement
++ instead. This way the combiner will notice logical operations
++ such as ANDN later on and substitute. */
++ if (trailing_bits == 0x3ff)
++ {
++ emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
++ }
++ else
++ {
++ emit_insn (gen_rtx_SET (op0,
++ gen_safe_XOR64 (temp,
++ (-0x400 | trailing_bits))));
++ }
++ return;
++ }
++
++ /* 1) sethi %hi(xxx), %reg
++ * or %reg, %lo(xxx), %reg
++ * sllx %reg, yyy, %reg
++ *
++ * ??? This is just a generalized version of the low_bits==0
++ * thing above, FIXME...
++ */
++ if ((highest_bit_set - lowest_bit_set) < 32)
++ {
++ unsigned HOST_WIDE_INT focus_bits =
++ create_simple_focus_bits (high_bits, low_bits,
++ lowest_bit_set, 0);
++
++ /* We can't get here in this state. */
++ gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
++
++ /* So what we know is that the set bits straddle the
++ middle of the 64-bit word. */
++ sparc_emit_set_const64_quick2 (op0, temp,
++ focus_bits, 0,
++ lowest_bit_set);
++ return;
++ }
++
++ /* 1) sethi %hi(high_bits), %reg
++ * or %reg, %lo(high_bits), %reg
++ * sllx %reg, 32, %reg
++ * or %reg, low_bits, %reg
++ */
++ if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
++ {
++ sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
++ return;
++ }
++
++ /* The easiest way when all else fails, is full decomposition. */
++ sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
++}
++
++/* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
++
++static bool
++sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
++{
++ *p1 = SPARC_ICC_REG;
++ *p2 = SPARC_FCC_REG;
++ return true;
++}
++
++/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
++
++static unsigned int
++sparc_min_arithmetic_precision (void)
++{
++ return 32;
++}
++
++/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
++ return the mode to be used for the comparison. For floating-point,
++ CCFP[E]mode is used. CCNZmode should be used when the first operand
++ is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
++ processing is needed. */
++
++machine_mode
++select_cc_mode (enum rtx_code op, rtx x, rtx y)
++{
++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
++ {
++ switch (op)
++ {
++ case EQ:
++ case NE:
++ case UNORDERED:
++ case ORDERED:
++ case UNLT:
++ case UNLE:
++ case UNGT:
++ case UNGE:
++ case UNEQ:
++ return CCFPmode;
++
++ case LT:
++ case LE:
++ case GT:
++ case GE:
++ case LTGT:
++ return CCFPEmode;
++
++ default:
++ gcc_unreachable ();
++ }
++ }
++ else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
++ || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
++ && y == const0_rtx)
++ {
++ if (TARGET_ARCH64 && GET_MODE (x) == DImode)
++ return CCXNZmode;
++ else
++ return CCNZmode;
++ }
++ else
++ {
++ /* This is for the cmp<mode>_sne pattern. */
++ if (GET_CODE (x) == NOT && y == constm1_rtx)
++ {
++ if (TARGET_ARCH64 && GET_MODE (x) == DImode)
++ return CCXCmode;
++ else
++ return CCCmode;
++ }
++
++ /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
++ if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
++ {
++ if (GET_CODE (y) == UNSPEC
++ && (XINT (y, 1) == UNSPEC_ADDV
++ || XINT (y, 1) == UNSPEC_SUBV
++ || XINT (y, 1) == UNSPEC_NEGV))
++ return CCVmode;
++ else
++ return CCCmode;
++ }
++
++ if (TARGET_ARCH64 && GET_MODE (x) == DImode)
++ return CCXmode;
++ else
++ return CCmode;
++ }
++}
++
++/* Emit the compare insn and return the CC reg for a CODE comparison
++ with operands X and Y. */
++
++static rtx
++gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
++{
++ machine_mode mode;
++ rtx cc_reg;
++
++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
++ return x;
++
++ mode = SELECT_CC_MODE (code, x, y);
++
++ /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
++ fcc regs (cse can't tell they're really call clobbered regs and will
++ remove a duplicate comparison even if there is an intervening function
++ call - it will then try to reload the cc reg via an int reg which is why
++ we need the movcc patterns). It is possible to provide the movcc
++ patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
++ registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
++ to tell cse that CCFPE mode registers (even pseudos) are call
++ clobbered. */
++
++ /* ??? This is an experiment. Rather than making changes to cse which may
++ or may not be easy/clean, we do our own cse. This is possible because
++ we will generate hard registers. Cse knows they're call clobbered (it
++ doesn't know the same thing about pseudos). If we guess wrong, no big
++ deal, but if we win, great! */
++
++ if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
++#if 1 /* experiment */
++ {
++ int reg;
++ /* We cycle through the registers to ensure they're all exercised. */
++ static int next_fcc_reg = 0;
++ /* Previous x,y for each fcc reg. */
++ static rtx prev_args[4][2];
++
++ /* Scan prev_args for x,y. */
++ for (reg = 0; reg < 4; reg++)
++ if (prev_args[reg][0] == x && prev_args[reg][1] == y)
++ break;
++ if (reg == 4)
++ {
++ reg = next_fcc_reg;
++ prev_args[reg][0] = x;
++ prev_args[reg][1] = y;
++ next_fcc_reg = (next_fcc_reg + 1) & 3;
++ }
++ cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
++ }
++#else
++ cc_reg = gen_reg_rtx (mode);
++#endif /* ! experiment */
++ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
++ cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
++ else
++ cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
++
++ /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
++ will only result in an unrecognizable insn so no point in asserting. */
++ emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
++
++ return cc_reg;
++}
++
++
++/* Emit the compare insn and return the CC reg for the comparison in CMP. */
++
++rtx
++gen_compare_reg (rtx cmp)
++{
++ return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
++}
++
++/* This function is used for v9 only.
++ DEST is the target of the Scc insn.
++ CODE is the code for an Scc's comparison.
++ X and Y are the values we compare.
++
++ This function is needed to turn
++
++ (set (reg:SI 110)
++ (gt (reg:CCX 100 %icc)
++ (const_int 0)))
++ into
++ (set (reg:SI 110)
++ (gt:DI (reg:CCX 100 %icc)
++ (const_int 0)))
++
++ IE: The instruction recognizer needs to see the mode of the comparison to
++ find the right instruction. We could use "gt:DI" right in the
++ define_expand, but leaving it out allows us to handle DI, SI, etc. */
++
++static int
++gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
++{
++ if (! TARGET_ARCH64
++ && (GET_MODE (x) == DImode
++ || GET_MODE (dest) == DImode))
++ return 0;
++
++ /* Try to use the movrCC insns. */
++ if (TARGET_ARCH64
++ && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
++ && y == const0_rtx
++ && v9_regcmp_p (compare_code))
++ {
++ rtx op0 = x;
++ rtx temp;
++
++ /* Special case for op0 != 0. This can be done with one instruction if
++ dest == x. */
++
++ if (compare_code == NE
++ && GET_MODE (dest) == DImode
++ && rtx_equal_p (op0, dest))
++ {
++ emit_insn (gen_rtx_SET (dest,
++ gen_rtx_IF_THEN_ELSE (DImode,
++ gen_rtx_fmt_ee (compare_code, DImode,
++ op0, const0_rtx),
++ const1_rtx,
++ dest)));
++ return 1;
++ }
++
++ if (reg_overlap_mentioned_p (dest, op0))
++ {
++ /* Handle the case where dest == x.
++ We "early clobber" the result. */
++ op0 = gen_reg_rtx (GET_MODE (x));
++ emit_move_insn (op0, x);
++ }
++
++ emit_insn (gen_rtx_SET (dest, const0_rtx));
++ if (GET_MODE (op0) != DImode)
++ {
++ temp = gen_reg_rtx (DImode);
++ convert_move (temp, op0, 0);
++ }
++ else
++ temp = op0;
++ emit_insn (gen_rtx_SET (dest,
++ gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
++ gen_rtx_fmt_ee (compare_code, DImode,
++ temp, const0_rtx),
++ const1_rtx,
++ dest)));
++ return 1;
++ }
++ else
++ {
++ x = gen_compare_reg_1 (compare_code, x, y);
++ y = const0_rtx;
++
++ emit_insn (gen_rtx_SET (dest, const0_rtx));
++ emit_insn (gen_rtx_SET (dest,
++ gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
++ gen_rtx_fmt_ee (compare_code,
++ GET_MODE (x), x, y),
++ const1_rtx, dest)));
++ return 1;
++ }
++}
++
++
++/* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
++ without jumps using the addx/subx instructions. */
++
++bool
++emit_scc_insn (rtx operands[])
++{
++ rtx tem, x, y;
++ enum rtx_code code;
++ machine_mode mode;
++
++ /* The quad-word fp compare library routines all return nonzero to indicate
++ true, which is different from the equivalent libgcc routines, so we must
++ handle them specially here. */
++ if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
++ {
++ operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
++ GET_CODE (operands[1]));
++ operands[2] = XEXP (operands[1], 0);
++ operands[3] = XEXP (operands[1], 1);
++ }
++
++ code = GET_CODE (operands[1]);
++ x = operands[2];
++ y = operands[3];
++ mode = GET_MODE (x);
++
++ /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
++ more applications). The exception to this is "reg != 0" which can
++ be done in one instruction on v9 (so we do it). */
++ if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
++ {
++ if (y != const0_rtx)
++ x = force_reg (mode, gen_rtx_XOR (mode, x, y));
++
++ rtx pat = gen_rtx_SET (operands[0],
++ gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
++ x, const0_rtx));
++
++ /* If we can use addx/subx or addxc, add a clobber for CC. */
++ if (mode == SImode || (code == NE && TARGET_VIS3))
++ {
++ rtx clobber
++ = gen_rtx_CLOBBER (VOIDmode,
++ gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
++ SPARC_ICC_REG));
++ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
++ }
++
++ emit_insn (pat);
++ return true;
++ }
++
++ /* We can do LTU in DImode using the addxc instruction with VIS3. */
++ if (TARGET_ARCH64
++ && mode == DImode
++ && !((code == LTU || code == GTU) && TARGET_VIS3)
++ && gen_v9_scc (operands[0], code, x, y))
++ return true;
++
++ /* We can do LTU and GEU using the addx/subx instructions too. And
++ for GTU/LEU, if both operands are registers swap them and fall
++ back to the easy case. */
++ if (code == GTU || code == LEU)
++ {
++ if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
++ && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
++ {
++ tem = x;
++ x = y;
++ y = tem;
++ code = swap_condition (code);
++ }
++ }
++
++ if (code == LTU || code == GEU)
++ {
++ emit_insn (gen_rtx_SET (operands[0],
++ gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
++ gen_compare_reg_1 (code, x, y),
++ const0_rtx)));
++ return true;
++ }
++
++ /* All the posibilities to use addx/subx based sequences has been
++ exhausted, try for a 3 instruction sequence using v9 conditional
++ moves. */
++ if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
++ return true;
++
++ /* Nope, do branches. */
++ return false;
++}
++
++/* Emit a conditional jump insn for the v9 architecture using comparison code
++ CODE and jump target LABEL.
++ This function exists to take advantage of the v9 brxx insns. */
++
++static void
++emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
++{
++ emit_jump_insn (gen_rtx_SET (pc_rtx,
++ gen_rtx_IF_THEN_ELSE (VOIDmode,
++ gen_rtx_fmt_ee (code, GET_MODE (op0),
++ op0, const0_rtx),
++ gen_rtx_LABEL_REF (VOIDmode, label),
++ pc_rtx)));
++}
++
++/* Emit a conditional jump insn for the UA2011 architecture using
++ comparison code CODE and jump target LABEL. This function exists
++ to take advantage of the UA2011 Compare and Branch insns. */
++
++static void
++emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
++{
++ rtx if_then_else;
++
++ if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
++ gen_rtx_fmt_ee(code, GET_MODE(op0),
++ op0, op1),
++ gen_rtx_LABEL_REF (VOIDmode, label),
++ pc_rtx);
++
++ emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
++}
++
++void
++emit_conditional_branch_insn (rtx operands[])
++{
++ /* The quad-word fp compare library routines all return nonzero to indicate
++ true, which is different from the equivalent libgcc routines, so we must
++ handle them specially here. */
++ if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
++ {
++ operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
++ GET_CODE (operands[0]));
++ operands[1] = XEXP (operands[0], 0);
++ operands[2] = XEXP (operands[0], 1);
++ }
++
++ /* If we can tell early on that the comparison is against a constant
++ that won't fit in the 5-bit signed immediate field of a cbcond,
++ use one of the other v9 conditional branch sequences. */
++ if (TARGET_CBCOND
++ && GET_CODE (operands[1]) == REG
++ && (GET_MODE (operands[1]) == SImode
++ || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
++ && (GET_CODE (operands[2]) != CONST_INT
++ || SPARC_SIMM5_P (INTVAL (operands[2]))))
++ {
++ emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
++ return;
++ }
++
++ if (TARGET_ARCH64 && operands[2] == const0_rtx
++ && GET_CODE (operands[1]) == REG
++ && GET_MODE (operands[1]) == DImode)
++ {
++ emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
++ return;
++ }
++
++ operands[1] = gen_compare_reg (operands[0]);
++ operands[2] = const0_rtx;
++ operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
++ operands[1], operands[2]);
++ emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
++ operands[3]));
++}
++
++
++/* Generate a DFmode part of a hard TFmode register.
++ REG is the TFmode hard register, LOW is 1 for the
++ low 64bit of the register and 0 otherwise.
++ */
++rtx
++gen_df_reg (rtx reg, int low)
++{
++ int regno = REGNO (reg);
++
++ if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
++ regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
++ return gen_rtx_REG (DFmode, regno);
++}
++
++/* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
++ Unlike normal calls, TFmode operands are passed by reference. It is
++ assumed that no more than 3 operands are required. */
++
++static void
++emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
++{
++ rtx ret_slot = NULL, arg[3], func_sym;
++ int i;
++
++ /* We only expect to be called for conversions, unary, and binary ops. */
++ gcc_assert (nargs == 2 || nargs == 3);
++
++ for (i = 0; i < nargs; ++i)
++ {
++ rtx this_arg = operands[i];
++ rtx this_slot;
++
++ /* TFmode arguments and return values are passed by reference. */
++ if (GET_MODE (this_arg) == TFmode)
++ {
++ int force_stack_temp;
++
++ force_stack_temp = 0;
++ if (TARGET_BUGGY_QP_LIB && i == 0)
++ force_stack_temp = 1;
++
++ if (GET_CODE (this_arg) == MEM
++ && ! force_stack_temp)
++ {
++ tree expr = MEM_EXPR (this_arg);
++ if (expr)
++ mark_addressable (expr);
++ this_arg = XEXP (this_arg, 0);
++ }
++ else if (CONSTANT_P (this_arg)
++ && ! force_stack_temp)
++ {
++ this_slot = force_const_mem (TFmode, this_arg);
++ this_arg = XEXP (this_slot, 0);
++ }
++ else
++ {
++ this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
++
++ /* Operand 0 is the return value. We'll copy it out later. */
++ if (i > 0)
++ emit_move_insn (this_slot, this_arg);
++ else
++ ret_slot = this_slot;
++
++ this_arg = XEXP (this_slot, 0);
++ }
++ }
++
++ arg[i] = this_arg;
++ }
++
++ func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
++
++ if (GET_MODE (operands[0]) == TFmode)
++ {
++ if (nargs == 2)
++ emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
++ arg[0], GET_MODE (arg[0]),
++ arg[1], GET_MODE (arg[1]));
++ else
++ emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
++ arg[0], GET_MODE (arg[0]),
++ arg[1], GET_MODE (arg[1]),
++ arg[2], GET_MODE (arg[2]));
++
++ if (ret_slot)
++ emit_move_insn (operands[0], ret_slot);
++ }
++ else
++ {
++ rtx ret;
++
++ gcc_assert (nargs == 2);
++
++ ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
++ GET_MODE (operands[0]),
++ arg[1], GET_MODE (arg[1]));
++
++ if (ret != operands[0])
++ emit_move_insn (operands[0], ret);
++ }
++}
++
++/* Expand soft-float TFmode calls to sparc abi routines. */
++
++static void
++emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
++{
++ const char *func;
++
++ switch (code)
++ {
++ case PLUS:
++ func = "_Qp_add";
++ break;
++ case MINUS:
++ func = "_Qp_sub";
++ break;
++ case MULT:
++ func = "_Qp_mul";
++ break;
++ case DIV:
++ func = "_Qp_div";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ emit_soft_tfmode_libcall (func, 3, operands);
++}
++
++static void
++emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
++{
++ const char *func;
++
++ gcc_assert (code == SQRT);
++ func = "_Qp_sqrt";
++
++ emit_soft_tfmode_libcall (func, 2, operands);
++}
++
++static void
++emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
++{
++ const char *func;
++
++ switch (code)
++ {
++ case FLOAT_EXTEND:
++ switch (GET_MODE (operands[1]))
++ {
++ case E_SFmode:
++ func = "_Qp_stoq";
++ break;
++ case E_DFmode:
++ func = "_Qp_dtoq";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ case FLOAT_TRUNCATE:
++ switch (GET_MODE (operands[0]))
++ {
++ case E_SFmode:
++ func = "_Qp_qtos";
++ break;
++ case E_DFmode:
++ func = "_Qp_qtod";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ case FLOAT:
++ switch (GET_MODE (operands[1]))
++ {
++ case E_SImode:
++ func = "_Qp_itoq";
++ if (TARGET_ARCH64)
++ operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
++ break;
++ case E_DImode:
++ func = "_Qp_xtoq";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ case UNSIGNED_FLOAT:
++ switch (GET_MODE (operands[1]))
++ {
++ case E_SImode:
++ func = "_Qp_uitoq";
++ if (TARGET_ARCH64)
++ operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
++ break;
++ case E_DImode:
++ func = "_Qp_uxtoq";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ case FIX:
++ switch (GET_MODE (operands[0]))
++ {
++ case E_SImode:
++ func = "_Qp_qtoi";
++ break;
++ case E_DImode:
++ func = "_Qp_qtox";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ case UNSIGNED_FIX:
++ switch (GET_MODE (operands[0]))
++ {
++ case E_SImode:
++ func = "_Qp_qtoui";
++ break;
++ case E_DImode:
++ func = "_Qp_qtoux";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ emit_soft_tfmode_libcall (func, 2, operands);
++}
++
++/* Expand a hard-float tfmode operation. All arguments must be in
++ registers. */
++
++static void
++emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
++{
++ rtx op, dest;
++
++ if (GET_RTX_CLASS (code) == RTX_UNARY)
++ {
++ operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
++ op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
++ }
++ else
++ {
++ operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
++ operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
++ op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
++ operands[1], operands[2]);
++ }
++
++ if (register_operand (operands[0], VOIDmode))
++ dest = operands[0];
++ else
++ dest = gen_reg_rtx (GET_MODE (operands[0]));
++
++ emit_insn (gen_rtx_SET (dest, op));
++
++ if (dest != operands[0])
++ emit_move_insn (operands[0], dest);
++}
++
++void
++emit_tfmode_binop (enum rtx_code code, rtx *operands)
++{
++ if (TARGET_HARD_QUAD)
++ emit_hard_tfmode_operation (code, operands);
++ else
++ emit_soft_tfmode_binop (code, operands);
++}
++
++void
++emit_tfmode_unop (enum rtx_code code, rtx *operands)
++{
++ if (TARGET_HARD_QUAD)
++ emit_hard_tfmode_operation (code, operands);
++ else
++ emit_soft_tfmode_unop (code, operands);
++}
++
++void
++emit_tfmode_cvt (enum rtx_code code, rtx *operands)
++{
++ if (TARGET_HARD_QUAD)
++ emit_hard_tfmode_operation (code, operands);
++ else
++ emit_soft_tfmode_cvt (code, operands);
++}
++
++/* Return nonzero if a branch/jump/call instruction will be emitting
++ nop into its delay slot. */
++
++int
++empty_delay_slot (rtx_insn *insn)
++{
++ rtx seq;
++
++ /* If no previous instruction (should not happen), return true. */
++ if (PREV_INSN (insn) == NULL)
++ return 1;
++
++ seq = NEXT_INSN (PREV_INSN (insn));
++ if (GET_CODE (PATTERN (seq)) == SEQUENCE)
++ return 0;
++
++ return 1;
++}
++
++/* Return nonzero if we should emit a nop after a cbcond instruction.
++ The cbcond instruction does not have a delay slot, however there is
++ a severe performance penalty if a control transfer appears right
++ after a cbcond. Therefore we emit a nop when we detect this
++ situation. */
++
++int
++emit_cbcond_nop (rtx_insn *insn)
++{
++ rtx next = next_active_insn (insn);
++
++ if (!next)
++ return 1;
++
++ if (NONJUMP_INSN_P (next)
++ && GET_CODE (PATTERN (next)) == SEQUENCE)
++ next = XVECEXP (PATTERN (next), 0, 0);
++ else if (CALL_P (next)
++ && GET_CODE (PATTERN (next)) == PARALLEL)
++ {
++ rtx delay = XVECEXP (PATTERN (next), 0, 1);
++
++ if (GET_CODE (delay) == RETURN)
++ {
++ /* It's a sibling call. Do not emit the nop if we're going
++ to emit something other than the jump itself as the first
++ instruction of the sibcall sequence. */
++ if (sparc_leaf_function_p || TARGET_FLAT)
++ return 0;
++ }
++ }
++
++ if (NONJUMP_INSN_P (next))
++ return 0;
++
++ return 1;
++}
++
++/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
++ instruction. RETURN_P is true if the v9 variant 'return' is to be
++ considered in the test too.
++
++ TRIAL must be a SET whose destination is a REG appropriate for the
++ 'restore' instruction or, if RETURN_P is true, for the 'return'
++ instruction. */
++
++static int
++eligible_for_restore_insn (rtx trial, bool return_p)
++{
++ rtx pat = PATTERN (trial);
++ rtx src = SET_SRC (pat);
++ bool src_is_freg = false;
++ rtx src_reg;
++
++ /* Since we now can do moves between float and integer registers when
++ VIS3 is enabled, we have to catch this case. We can allow such
++ moves when doing a 'return' however. */
++ src_reg = src;
++ if (GET_CODE (src_reg) == SUBREG)
++ src_reg = SUBREG_REG (src_reg);
++ if (GET_CODE (src_reg) == REG
++ && SPARC_FP_REG_P (REGNO (src_reg)))
++ src_is_freg = true;
++
++ /* The 'restore src,%g0,dest' pattern for word mode and below. */
++ if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
++ && arith_operand (src, GET_MODE (src))
++ && ! src_is_freg)
++ {
++ if (TARGET_ARCH64)
++ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
++ else
++ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
++ }
++
++ /* The 'restore src,%g0,dest' pattern for double-word mode. */
++ else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
++ && arith_double_operand (src, GET_MODE (src))
++ && ! src_is_freg)
++ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
++
++ /* The 'restore src,%g0,dest' pattern for float if no FPU. */
++ else if (! TARGET_FPU && register_operand (src, SFmode))
++ return 1;
++
++ /* The 'restore src,%g0,dest' pattern for double if no FPU. */
++ else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
++ return 1;
++
++ /* If we have the 'return' instruction, anything that does not use
++ local or output registers and can go into a delay slot wins. */
++ else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
++ return 1;
++
++ /* The 'restore src1,src2,dest' pattern for SImode. */
++ else if (GET_CODE (src) == PLUS
++ && register_operand (XEXP (src, 0), SImode)
++ && arith_operand (XEXP (src, 1), SImode))
++ return 1;
++
++ /* The 'restore src1,src2,dest' pattern for DImode. */
++ else if (GET_CODE (src) == PLUS
++ && register_operand (XEXP (src, 0), DImode)
++ && arith_double_operand (XEXP (src, 1), DImode))
++ return 1;
++
++ /* The 'restore src1,%lo(src2),dest' pattern. */
++ else if (GET_CODE (src) == LO_SUM
++ && ! TARGET_CM_MEDMID
++ && ((register_operand (XEXP (src, 0), SImode)
++ && immediate_operand (XEXP (src, 1), SImode))
++ || (TARGET_ARCH64
++ && register_operand (XEXP (src, 0), DImode)
++ && immediate_operand (XEXP (src, 1), DImode))))
++ return 1;
++
++ /* The 'restore src,src,dest' pattern. */
++ else if (GET_CODE (src) == ASHIFT
++ && (register_operand (XEXP (src, 0), SImode)
++ || register_operand (XEXP (src, 0), DImode))
++ && XEXP (src, 1) == const1_rtx)
++ return 1;
++
++ return 0;
++}
++
++/* Return nonzero if TRIAL can go into the function return's delay slot. */
++
++int
++eligible_for_return_delay (rtx_insn *trial)
++{
++ int regno;
++ rtx pat;
++
++ /* If the function uses __builtin_eh_return, the eh_return machinery
++ occupies the delay slot. */
++ if (crtl->calls_eh_return)
++ return 0;
++
++ if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
++ return 0;
++
++ /* In the case of a leaf or flat function, anything can go into the slot. */
++ if (sparc_leaf_function_p || TARGET_FLAT)
++ return 1;
++
++ if (!NONJUMP_INSN_P (trial))
++ return 0;
++
++ pat = PATTERN (trial);
++ if (GET_CODE (pat) == PARALLEL)
++ {
++ int i;
++
++ if (! TARGET_V9)
++ return 0;
++ for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
++ {
++ rtx expr = XVECEXP (pat, 0, i);
++ if (GET_CODE (expr) != SET)
++ return 0;
++ if (GET_CODE (SET_DEST (expr)) != REG)
++ return 0;
++ regno = REGNO (SET_DEST (expr));
++ if (regno >= 8 && regno < 24)
++ return 0;
++ }
++ return !epilogue_renumber (&pat, 1);
++ }
++
++ if (GET_CODE (pat) != SET)
++ return 0;
++
++ if (GET_CODE (SET_DEST (pat)) != REG)
++ return 0;
++
++ regno = REGNO (SET_DEST (pat));
++
++ /* Otherwise, only operations which can be done in tandem with
++ a `restore' or `return' insn can go into the delay slot. */
++ if (regno >= 8 && regno < 24)
++ return 0;
++
++ /* If this instruction sets up floating point register and we have a return
++ instruction, it can probably go in. But restore will not work
++ with FP_REGS. */
++ if (! SPARC_INT_REG_P (regno))
++ return TARGET_V9 && !epilogue_renumber (&pat, 1);
++
++ return eligible_for_restore_insn (trial, true);
++}
++
++/* Return nonzero if TRIAL can go into the sibling call's delay slot. */
++
++int
++eligible_for_sibcall_delay (rtx_insn *trial)
++{
++ rtx pat;
++
++ if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
++ return 0;
++
++ if (!NONJUMP_INSN_P (trial))
++ return 0;
++
++ pat = PATTERN (trial);
++
++ if (sparc_leaf_function_p || TARGET_FLAT)
++ {
++ /* If the tail call is done using the call instruction,
++ we have to restore %o7 in the delay slot. */
++ if (LEAF_SIBCALL_SLOT_RESERVED_P)
++ return 0;
++
++ /* %g1 is used to build the function address */
++ if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
++ return 0;
++
++ return 1;
++ }
++
++ if (GET_CODE (pat) != SET)
++ return 0;
++
++ /* Otherwise, only operations which can be done in tandem with
++ a `restore' insn can go into the delay slot. */
++ if (GET_CODE (SET_DEST (pat)) != REG
++ || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
++ || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
++ return 0;
++
++ /* If it mentions %o7, it can't go in, because sibcall will clobber it
++ in most cases. */
++ if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
++ return 0;
++
++ return eligible_for_restore_insn (trial, false);
++}
++
++/* Determine if it's legal to put X into the constant pool. This
++ is not possible if X contains the address of a symbol that is
++ not constant (TLS) or not known at final link time (PIC). */
++
++static bool
++sparc_cannot_force_const_mem (machine_mode mode, rtx x)
++{
++ switch (GET_CODE (x))
++ {
++ case CONST_INT:
++ case CONST_WIDE_INT:
++ case CONST_DOUBLE:
++ case CONST_VECTOR:
++ /* Accept all non-symbolic constants. */
++ return false;
++
++ case LABEL_REF:
++ /* Labels are OK iff we are non-PIC. */
++ return flag_pic != 0;
++
++ case SYMBOL_REF:
++ /* 'Naked' TLS symbol references are never OK,
++ non-TLS symbols are OK iff we are non-PIC. */
++ if (SYMBOL_REF_TLS_MODEL (x))
++ return true;
++ else
++ return flag_pic != 0;
++
++ case CONST:
++ return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
++ case PLUS:
++ case MINUS:
++ return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
++ || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
++ case UNSPEC:
++ return true;
++ default:
++ gcc_unreachable ();
++ }
++}
++
++/* Global Offset Table support. */
++static GTY(()) rtx got_symbol_rtx = NULL_RTX;
++static GTY(()) rtx got_register_rtx = NULL_RTX;
++static GTY(()) rtx got_helper_rtx = NULL_RTX;
++
++static GTY(()) bool got_helper_needed = false;
++
++/* Return the SYMBOL_REF for the Global Offset Table. */
++
++static rtx
++sparc_got (void)
++{
++ if (!got_symbol_rtx)
++ got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++
++ return got_symbol_rtx;
++}
++
++/* Wrapper around the load_pcrel_sym{si,di} patterns. */
++
++static rtx
++gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
++{
++ int orig_flag_pic = flag_pic;
++ rtx insn;
++
++ /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
++ flag_pic = 0;
++ if (TARGET_ARCH64)
++ insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
++ else
++ insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
++ flag_pic = orig_flag_pic;
++
++ return insn;
++}
++
++/* Output the load_pcrel_sym{si,di} patterns. */
++
++const char *
++output_load_pcrel_sym (rtx *operands)
++{
++ if (flag_delayed_branch)
++ {
++ output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
++ output_asm_insn ("call\t%a2", operands);
++ output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
++ }
++ else
++ {
++ output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
++ output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
++ output_asm_insn ("call\t%a2", operands);
++ output_asm_insn (" nop", NULL);
++ }
++
++ if (operands[2] == got_helper_rtx)
++ got_helper_needed = true;
++
++ return "";
++}
++
++#ifdef HAVE_GAS_HIDDEN
++# define USE_HIDDEN_LINKONCE 1
++#else
++# define USE_HIDDEN_LINKONCE 0
++#endif
++
++/* Emit code to load the GOT register. */
++
++void
++load_got_register (void)
++{
++ rtx insn;
++
++ if (TARGET_VXWORKS_RTP)
++ {
++ if (!got_register_rtx)
++ got_register_rtx = pic_offset_table_rtx;
++
++ insn = gen_vxworks_load_got ();
++ }
++ else
++ {
++ if (!got_register_rtx)
++ got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
++
++ /* The GOT symbol is subject to a PC-relative relocation so we need a
++ helper function to add the PC value and thus get the final value. */
++ if (!got_helper_rtx)
++ {
++ char name[32];
++
++ /* Skip the leading '%' as that cannot be used in a symbol name. */
++ if (USE_HIDDEN_LINKONCE)
++ sprintf (name, "__sparc_get_pc_thunk.%s",
++ reg_names[REGNO (got_register_rtx)] + 1);
++ else
++ ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
++ REGNO (got_register_rtx));
++
++ got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
++ }
++
++ insn
++ = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
++ }
++
++ emit_insn (insn);
++}
++
++/* Ensure that we are not using patterns that are not OK with PIC. */
++
++int
++check_pic (int i)
++{
++ rtx op;
++
++ switch (flag_pic)
++ {
++ case 1:
++ op = recog_data.operand[i];
++ gcc_assert (GET_CODE (op) != SYMBOL_REF
++ && (GET_CODE (op) != CONST
++ || (GET_CODE (XEXP (op, 0)) == MINUS
++ && XEXP (XEXP (op, 0), 0) == sparc_got ()
++ && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
++ /* fallthrough */
++ case 2:
++ default:
++ return 1;
++ }
++}
++
++/* Return true if X is an address which needs a temporary register when
++ reloaded while generating PIC code. */
++
++int
++pic_address_needs_scratch (rtx x)
++{
++ /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
++ if (GET_CODE (x) == CONST
++ && GET_CODE (XEXP (x, 0)) == PLUS
++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
++ && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
++ return 1;
++
++ return 0;
++}
++
++/* Determine if a given RTX is a valid constant. We already know this
++ satisfies CONSTANT_P. */
++
++static bool
++sparc_legitimate_constant_p (machine_mode mode, rtx x)
++{
++ switch (GET_CODE (x))
++ {
++ case CONST:
++ case SYMBOL_REF:
++ if (sparc_tls_referenced_p (x))
++ return false;
++ break;
++
++ case CONST_DOUBLE:
++ /* Floating point constants are generally not ok.
++ The only exception is 0.0 and all-ones in VIS. */
++ if (TARGET_VIS
++ && SCALAR_FLOAT_MODE_P (mode)
++ && (const_zero_operand (x, mode)
++ || const_all_ones_operand (x, mode)))
++ return true;
++
++ return false;
++
++ case CONST_VECTOR:
++ /* Vector constants are generally not ok.
++ The only exception is 0 or -1 in VIS. */
++ if (TARGET_VIS
++ && (const_zero_operand (x, mode)
++ || const_all_ones_operand (x, mode)))
++ return true;
++
++ return false;
++
++ default:
++ break;
++ }
++
++ return true;
++}
++
++/* Determine if a given RTX is a valid constant address. */
++
++bool
++constant_address_p (rtx x)
++{
++ switch (GET_CODE (x))
++ {
++ case LABEL_REF:
++ case CONST_INT:
++ case HIGH:
++ return true;
++
++ case CONST:
++ if (flag_pic && pic_address_needs_scratch (x))
++ return false;
++ return sparc_legitimate_constant_p (Pmode, x);
++
++ case SYMBOL_REF:
++ return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
++
++ default:
++ return false;
++ }
++}
++
++/* Nonzero if the constant value X is a legitimate general operand
++ when generating PIC code. It is given that flag_pic is on and
++ that X satisfies CONSTANT_P. */
++
++bool
++legitimate_pic_operand_p (rtx x)
++{
++ if (pic_address_needs_scratch (x))
++ return false;
++ if (sparc_tls_referenced_p (x))
++ return false;
++ return true;
++}
++
++/* Return true if X is a representation of the PIC register. */
++
++static bool
++sparc_pic_register_p (rtx x)
++{
++ if (!REG_P (x) || !pic_offset_table_rtx)
++ return false;
++
++ if (x == pic_offset_table_rtx)
++ return true;
++
++ if (!HARD_REGISTER_P (pic_offset_table_rtx)
++ && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
++ && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
++ return true;
++
++ return false;
++}
++
++#define RTX_OK_FOR_OFFSET_P(X, MODE) \
++ (CONST_INT_P (X) \
++ && INTVAL (X) >= -0x1000 \
++ && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
++
++#define RTX_OK_FOR_OLO10_P(X, MODE) \
++ (CONST_INT_P (X) \
++ && INTVAL (X) >= -0x1000 \
++ && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
++
++/* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
++
++ On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
++ ordinarily. This changes a bit when generating PIC. */
++
++static bool
++sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
++{
++ rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
++
++ if (REG_P (addr) || GET_CODE (addr) == SUBREG)
++ rs1 = addr;
++ else if (GET_CODE (addr) == PLUS)
++ {
++ rs1 = XEXP (addr, 0);
++ rs2 = XEXP (addr, 1);
++
++ /* Canonicalize. REG comes first, if there are no regs,
++ LO_SUM comes first. */
++ if (!REG_P (rs1)
++ && GET_CODE (rs1) != SUBREG
++ && (REG_P (rs2)
++ || GET_CODE (rs2) == SUBREG
++ || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
++ {
++ rs1 = XEXP (addr, 1);
++ rs2 = XEXP (addr, 0);
++ }
++
++ if ((flag_pic == 1
++ && sparc_pic_register_p (rs1)
++ && !REG_P (rs2)
++ && GET_CODE (rs2) != SUBREG
++ && GET_CODE (rs2) != LO_SUM
++ && GET_CODE (rs2) != MEM
++ && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
++ && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
++ && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
++ || ((REG_P (rs1)
++ || GET_CODE (rs1) == SUBREG)
++ && RTX_OK_FOR_OFFSET_P (rs2, mode)))
++ {
++ imm1 = rs2;
++ rs2 = NULL;
++ }
++ else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
++ && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
++ {
++ /* We prohibit REG + REG for TFmode when there are no quad move insns
++ and we consequently need to split. We do this because REG+REG
++ is not an offsettable address. If we get the situation in reload
++ where source and destination of a movtf pattern are both MEMs with
++ REG+REG address, then only one of them gets converted to an
++ offsettable address. */
++ if (mode == TFmode
++ && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
++ return 0;
++
++ /* Likewise for TImode, but in all cases. */
++ if (mode == TImode)
++ return 0;
++
++ /* We prohibit REG + REG on ARCH32 if not optimizing for
++ DFmode/DImode because then mem_min_alignment is likely to be zero
++ after reload and the forced split would lack a matching splitter
++ pattern. */
++ if (TARGET_ARCH32 && !optimize
++ && (mode == DFmode || mode == DImode))
++ return 0;
++ }
++ else if (USE_AS_OFFSETABLE_LO10
++ && GET_CODE (rs1) == LO_SUM
++ && TARGET_ARCH64
++ && ! TARGET_CM_MEDMID
++ && RTX_OK_FOR_OLO10_P (rs2, mode))
++ {
++ rs2 = NULL;
++ imm1 = XEXP (rs1, 1);
++ rs1 = XEXP (rs1, 0);
++ if (!CONSTANT_P (imm1)
++ || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
++ return 0;
++ }
++ }
++ else if (GET_CODE (addr) == LO_SUM)
++ {
++ rs1 = XEXP (addr, 0);
++ imm1 = XEXP (addr, 1);
++
++ if (!CONSTANT_P (imm1)
++ || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
++ return 0;
++
++ /* We can't allow TFmode in 32-bit mode, because an offset greater
++ than the alignment (8) may cause the LO_SUM to overflow. */
++ if (mode == TFmode && TARGET_ARCH32)
++ return 0;
++
++ /* During reload, accept the HIGH+LO_SUM construct generated by
++ sparc_legitimize_reload_address. */
++ if (reload_in_progress
++ && GET_CODE (rs1) == HIGH
++ && XEXP (rs1, 0) == imm1)
++ return 1;
++ }
++ else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
++ return 1;
++ else
++ return 0;
++
++ if (GET_CODE (rs1) == SUBREG)
++ rs1 = SUBREG_REG (rs1);
++ if (!REG_P (rs1))
++ return 0;
++
++ if (rs2)
++ {
++ if (GET_CODE (rs2) == SUBREG)
++ rs2 = SUBREG_REG (rs2);
++ if (!REG_P (rs2))
++ return 0;
++ }
++
++ if (strict)
++ {
++ if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
++ || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
++ return 0;
++ }
++ else
++ {
++ if ((! SPARC_INT_REG_P (REGNO (rs1))
++ && REGNO (rs1) != FRAME_POINTER_REGNUM
++ && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
++ || (rs2
++ && (! SPARC_INT_REG_P (REGNO (rs2))
++ && REGNO (rs2) != FRAME_POINTER_REGNUM
++ && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
++ return 0;
++ }
++ return 1;
++}
++
++/* Return the SYMBOL_REF for the tls_get_addr function. */
++
++static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
++
++static rtx
++sparc_tls_get_addr (void)
++{
++ if (!sparc_tls_symbol)
++ sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
++
++ return sparc_tls_symbol;
++}
++
++/* Return the Global Offset Table to be used in TLS mode. */
++
++static rtx
++sparc_tls_got (void)
++{
++ /* In PIC mode, this is just the PIC offset table. */
++ if (flag_pic)
++ {
++ crtl->uses_pic_offset_table = 1;
++ return pic_offset_table_rtx;
++ }
++
++ /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
++ the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
++ if (TARGET_SUN_TLS && TARGET_ARCH32)
++ {
++ load_got_register ();
++ return got_register_rtx;
++ }
++
++ /* In all other cases, we load a new pseudo with the GOT symbol. */
++ return copy_to_reg (sparc_got ());
++}
++
++/* Return true if X contains a thread-local symbol. */
++
++static bool
++sparc_tls_referenced_p (rtx x)
++{
++ if (!TARGET_HAVE_TLS)
++ return false;
++
++ if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
++ x = XEXP (XEXP (x, 0), 0);
++
++ if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
++ return true;
++
++ /* That's all we handle in sparc_legitimize_tls_address for now. */
++ return false;
++}
++
++/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
++ this (thread-local) address. */
++
++static rtx
++sparc_legitimize_tls_address (rtx addr)
++{
++ rtx temp1, temp2, temp3, ret, o0, got;
++ rtx_insn *insn;
++
++ gcc_assert (can_create_pseudo_p ());
++
++ if (GET_CODE (addr) == SYMBOL_REF)
++ /* Although the various sethi/or sequences generate SImode values, many of
++ them can be transformed by the linker when relaxing and, if relaxing to
++ local-exec, will become a sethi/xor pair, which is signed and therefore
++ a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
++ values be spilled onto the stack in 64-bit mode. */
++ switch (SYMBOL_REF_TLS_MODEL (addr))
++ {
++ case TLS_MODEL_GLOBAL_DYNAMIC:
++ start_sequence ();
++ temp1 = gen_reg_rtx (Pmode);
++ temp2 = gen_reg_rtx (Pmode);
++ ret = gen_reg_rtx (Pmode);
++ o0 = gen_rtx_REG (Pmode, 8);
++ got = sparc_tls_got ();
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_tgd_hi22si (temp1, addr));
++ emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
++ emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
++ insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
++ addr, const1_rtx));
++ }
++ else
++ {
++ emit_insn (gen_tgd_hi22di (temp1, addr));
++ emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
++ emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
++ insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
++ addr, const1_rtx));
++ }
++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
++ RTL_CONST_CALL_P (insn) = 1;
++ insn = get_insns ();
++ end_sequence ();
++ emit_libcall_block (insn, ret, o0, addr);
++ break;
++
++ case TLS_MODEL_LOCAL_DYNAMIC:
++ start_sequence ();
++ temp1 = gen_reg_rtx (Pmode);
++ temp2 = gen_reg_rtx (Pmode);
++ temp3 = gen_reg_rtx (Pmode);
++ ret = gen_reg_rtx (Pmode);
++ o0 = gen_rtx_REG (Pmode, 8);
++ got = sparc_tls_got ();
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_tldm_hi22si (temp1));
++ emit_insn (gen_tldm_lo10si (temp2, temp1));
++ emit_insn (gen_tldm_addsi (o0, got, temp2));
++ insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
++ const1_rtx));
++ }
++ else
++ {
++ emit_insn (gen_tldm_hi22di (temp1));
++ emit_insn (gen_tldm_lo10di (temp2, temp1));
++ emit_insn (gen_tldm_adddi (o0, got, temp2));
++ insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
++ const1_rtx));
++ }
++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
++ RTL_CONST_CALL_P (insn) = 1;
++ insn = get_insns ();
++ end_sequence ();
++ /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
++ share the LD_BASE result with other LD model accesses. */
++ emit_libcall_block (insn, temp3, o0,
++ gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
++ UNSPEC_TLSLD_BASE));
++ temp1 = gen_reg_rtx (Pmode);
++ temp2 = gen_reg_rtx (Pmode);
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_tldo_hix22si (temp1, addr));
++ emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
++ emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
++ }
++ else
++ {
++ emit_insn (gen_tldo_hix22di (temp1, addr));
++ emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
++ emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
++ }
++ break;
++
++ case TLS_MODEL_INITIAL_EXEC:
++ temp1 = gen_reg_rtx (Pmode);
++ temp2 = gen_reg_rtx (Pmode);
++ temp3 = gen_reg_rtx (Pmode);
++ got = sparc_tls_got ();
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_tie_hi22si (temp1, addr));
++ emit_insn (gen_tie_lo10si (temp2, temp1, addr));
++ emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
++ }
++ else
++ {
++ emit_insn (gen_tie_hi22di (temp1, addr));
++ emit_insn (gen_tie_lo10di (temp2, temp1, addr));
++ emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
++ }
++ if (TARGET_SUN_TLS)
++ {
++ ret = gen_reg_rtx (Pmode);
++ if (TARGET_ARCH32)
++ emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
++ temp3, addr));
++ else
++ emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
++ temp3, addr));
++ }
++ else
++ ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
++ break;
++
++ case TLS_MODEL_LOCAL_EXEC:
++ temp1 = gen_reg_rtx (Pmode);
++ temp2 = gen_reg_rtx (Pmode);
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_tle_hix22si (temp1, addr));
++ emit_insn (gen_tle_lox10si (temp2, temp1, addr));
++ }
++ else
++ {
++ emit_insn (gen_tle_hix22di (temp1, addr));
++ emit_insn (gen_tle_lox10di (temp2, temp1, addr));
++ }
++ ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ else if (GET_CODE (addr) == CONST)
++ {
++ rtx base, offset;
++
++ gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
++
++ base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
++ offset = XEXP (XEXP (addr, 0), 1);
++
++ base = force_operand (base, NULL_RTX);
++ if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
++ offset = force_reg (Pmode, offset);
++ ret = gen_rtx_PLUS (Pmode, base, offset);
++ }
++
++ else
++ gcc_unreachable (); /* for now ... */
++
++ return ret;
++}
++
++/* Legitimize PIC addresses. If the address is already position-independent,
++ we return ORIG. Newly generated position-independent addresses go into a
++ reg. This is REG if nonzero, otherwise we allocate register(s) as
++ necessary. */
++
++static rtx
++sparc_legitimize_pic_address (rtx orig, rtx reg)
++{
++ if (GET_CODE (orig) == SYMBOL_REF
++ /* See the comment in sparc_expand_move. */
++ || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
++ {
++ bool gotdata_op = false;
++ rtx pic_ref, address;
++ rtx_insn *insn;
++
++ if (!reg)
++ {
++ gcc_assert (can_create_pseudo_p ());
++ reg = gen_reg_rtx (Pmode);
++ }
++
++ if (flag_pic == 2)
++ {
++ /* If not during reload, allocate another temp reg here for loading
++ in the address, so that these instructions can be optimized
++ properly. */
++ rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
++
++ /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
++ won't get confused into thinking that these two instructions
++ are loading in the true address of the symbol. If in the
++ future a PIC rtx exists, that should be used instead. */
++ if (TARGET_ARCH64)
++ {
++ emit_insn (gen_movdi_high_pic (temp_reg, orig));
++ emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
++ }
++ else
++ {
++ emit_insn (gen_movsi_high_pic (temp_reg, orig));
++ emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
++ }
++
++ address = temp_reg;
++ gotdata_op = true;
++ }
++ else
++ address = orig;
++
++ crtl->uses_pic_offset_table = 1;
++ if (gotdata_op)
++ {
++ if (TARGET_ARCH64)
++ insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
++ pic_offset_table_rtx,
++ address, orig));
++ else
++ insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
++ pic_offset_table_rtx,
++ address, orig));
++ }
++ else
++ {
++ pic_ref
++ = gen_const_mem (Pmode,
++ gen_rtx_PLUS (Pmode,
++ pic_offset_table_rtx, address));
++ insn = emit_move_insn (reg, pic_ref);
++ }
++
++ /* Put a REG_EQUAL note on this insn, so that it can be optimized
++ by loop. */
++ set_unique_reg_note (insn, REG_EQUAL, orig);
++ return reg;
++ }
++ else if (GET_CODE (orig) == CONST)
++ {
++ rtx base, offset;
++
++ if (GET_CODE (XEXP (orig, 0)) == PLUS
++ && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
++ return orig;
++
++ if (!reg)
++ {
++ gcc_assert (can_create_pseudo_p ());
++ reg = gen_reg_rtx (Pmode);
++ }
++
++ gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
++ base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
++ offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
++ base == reg ? NULL_RTX : reg);
++
++ if (GET_CODE (offset) == CONST_INT)
++ {
++ if (SMALL_INT (offset))
++ return plus_constant (Pmode, base, INTVAL (offset));
++ else if (can_create_pseudo_p ())
++ offset = force_reg (Pmode, offset);
++ else
++ /* If we reach here, then something is seriously wrong. */
++ gcc_unreachable ();
++ }
++ return gen_rtx_PLUS (Pmode, base, offset);
++ }
++ else if (GET_CODE (orig) == LABEL_REF)
++ /* ??? We ought to be checking that the register is live instead, in case
++ it is eliminated. */
++ crtl->uses_pic_offset_table = 1;
++
++ return orig;
++}
++
++/* Try machine-dependent ways of modifying an illegitimate address X
++ to be legitimate. If we find one, return the new, valid address.
++
++ OLDX is the address as it was before break_out_memory_refs was called.
++ In some cases it is useful to look at this to decide what needs to be done.
++
++ MODE is the mode of the operand pointed to by X.
++
++ On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
++
++static rtx
++sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
++ machine_mode mode)
++{
++ rtx orig_x = x;
++
++ if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
++ x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
++ force_operand (XEXP (x, 0), NULL_RTX));
++ if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
++ x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
++ force_operand (XEXP (x, 1), NULL_RTX));
++ if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
++ x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
++ XEXP (x, 1));
++ if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
++ x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
++ force_operand (XEXP (x, 1), NULL_RTX));
++
++ if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
++ return x;
++
++ if (sparc_tls_referenced_p (x))
++ x = sparc_legitimize_tls_address (x);
++ else if (flag_pic)
++ x = sparc_legitimize_pic_address (x, NULL_RTX);
++ else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
++ x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
++ copy_to_mode_reg (Pmode, XEXP (x, 1)));
++ else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
++ x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
++ copy_to_mode_reg (Pmode, XEXP (x, 0)));
++ else if (GET_CODE (x) == SYMBOL_REF
++ || GET_CODE (x) == CONST
++ || GET_CODE (x) == LABEL_REF)
++ x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
++
++ return x;
++}
++
++/* Delegitimize an address that was legitimized by the above function. */
++
++static rtx
++sparc_delegitimize_address (rtx x)
++{
++ x = delegitimize_mem_from_attrs (x);
++
++ if (GET_CODE (x) == LO_SUM)
++ x = XEXP (x, 1);
++
++ if (GET_CODE (x) == UNSPEC)
++ switch (XINT (x, 1))
++ {
++ case UNSPEC_MOVE_PIC:
++ case UNSPEC_TLSLE:
++ x = XVECEXP (x, 0, 0);
++ gcc_assert (GET_CODE (x) == SYMBOL_REF);
++ break;
++ case UNSPEC_MOVE_GOTDATA:
++ x = XVECEXP (x, 0, 2);
++ gcc_assert (GET_CODE (x) == SYMBOL_REF);
++ break;
++ default:
++ break;
++ }
++
++ /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
++ if (GET_CODE (x) == MINUS
++ && (XEXP (x, 0) == got_register_rtx
++ || sparc_pic_register_p (XEXP (x, 0))))
++ {
++ rtx y = XEXP (x, 1);
++
++ if (GET_CODE (y) == LO_SUM)
++ y = XEXP (y, 1);
++
++ if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
++ {
++ x = XVECEXP (y, 0, 0);
++ gcc_assert (GET_CODE (x) == LABEL_REF
++ || (GET_CODE (x) == CONST
++ && GET_CODE (XEXP (x, 0)) == PLUS
++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
++ }
++ }
++
++ return x;
++}
++
++/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
++ replace the input X, or the original X if no replacement is called for.
++ The output parameter *WIN is 1 if the calling macro should goto WIN,
++ 0 if it should not.
++
++ For SPARC, we wish to handle addresses by splitting them into
++ HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
++ This cuts the number of extra insns by one.
++
++ Do nothing when generating PIC code and the address is a symbolic
++ operand or requires a scratch register. */
++
++rtx
++sparc_legitimize_reload_address (rtx x, machine_mode mode,
++ int opnum, int type,
++ int ind_levels ATTRIBUTE_UNUSED, int *win)
++{
++ /* Decompose SImode constants into HIGH+LO_SUM. */
++ if (CONSTANT_P (x)
++ && (mode != TFmode || TARGET_ARCH64)
++ && GET_MODE (x) == SImode
++ && GET_CODE (x) != LO_SUM
++ && GET_CODE (x) != HIGH
++ && sparc_code_model <= CM_MEDLOW
++ && !(flag_pic
++ && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
++ {
++ x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
++ push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
++ BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
++ opnum, (enum reload_type)type);
++ *win = 1;
++ return x;
++ }
++
++ /* We have to recognize what we have already generated above. */
++ if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
++ {
++ push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
++ BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
++ opnum, (enum reload_type)type);
++ *win = 1;
++ return x;
++ }
++
++ *win = 0;
++ return x;
++}
++
++/* Return true if ADDR (a legitimate address expression)
++ has an effect that depends on the machine mode it is used for.
++
++ In PIC mode,
++
++ (mem:HI [%l7+a])
++
++ is not equivalent to
++
++ (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
++
++ because [%l7+a+1] is interpreted as the address of (a+1). */
++
++
++static bool
++sparc_mode_dependent_address_p (const_rtx addr,
++ addr_space_t as ATTRIBUTE_UNUSED)
++{
++ if (GET_CODE (addr) == PLUS
++ && sparc_pic_register_p (XEXP (addr, 0))
++ && symbolic_operand (XEXP (addr, 1), VOIDmode))
++ return true;
++
++ return false;
++}
++
++/* Emit a call instruction with the pattern given by PAT. ADDR is the
++ address of the call target. */
++
++void
++sparc_emit_call_insn (rtx pat, rtx addr)
++{
++ rtx_insn *insn;
++
++ insn = emit_call_insn (pat);
++
++ /* The PIC register is live on entry to VxWorks PIC PLT entries. */
++ if (TARGET_VXWORKS_RTP
++ && flag_pic
++ && GET_CODE (addr) == SYMBOL_REF
++ && (SYMBOL_REF_DECL (addr)
++ ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
++ : !SYMBOL_REF_LOCAL_P (addr)))
++ {
++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
++ crtl->uses_pic_offset_table = 1;
++ }
++}
++
++/* Return 1 if RTX is a MEM which is known to be aligned to at
++ least a DESIRED byte boundary. */
++
++int
++mem_min_alignment (rtx mem, int desired)
++{
++ rtx addr, base, offset;
++
++ /* If it's not a MEM we can't accept it. */
++ if (GET_CODE (mem) != MEM)
++ return 0;
++
++ /* Obviously... */
++ if (!TARGET_UNALIGNED_DOUBLES
++ && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
++ return 1;
++
++ /* ??? The rest of the function predates MEM_ALIGN so
++ there is probably a bit of redundancy. */
++ addr = XEXP (mem, 0);
++ base = offset = NULL_RTX;
++ if (GET_CODE (addr) == PLUS)
++ {
++ if (GET_CODE (XEXP (addr, 0)) == REG)
++ {
++ base = XEXP (addr, 0);
++
++ /* What we are saying here is that if the base
++ REG is aligned properly, the compiler will make
++ sure any REG based index upon it will be so
++ as well. */
++ if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
++ offset = XEXP (addr, 1);
++ else
++ offset = const0_rtx;
++ }
++ }
++ else if (GET_CODE (addr) == REG)
++ {
++ base = addr;
++ offset = const0_rtx;
++ }
++
++ if (base != NULL_RTX)
++ {
++ int regno = REGNO (base);
++
++ if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
++ {
++ /* Check if the compiler has recorded some information
++ about the alignment of the base REG. If reload has
++ completed, we already matched with proper alignments.
++ If not running global_alloc, reload might give us
++ unaligned pointer to local stack though. */
++ if (((cfun != 0
++ && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
++ || (optimize && reload_completed))
++ && (INTVAL (offset) & (desired - 1)) == 0)
++ return 1;
++ }
++ else
++ {
++ if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
++ return 1;
++ }
++ }
++ else if (! TARGET_UNALIGNED_DOUBLES
++ || CONSTANT_P (addr)
++ || GET_CODE (addr) == LO_SUM)
++ {
++ /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
++ is true, in which case we can only assume that an access is aligned if
++ it is to a constant address, or the address involves a LO_SUM. */
++ return 1;
++ }
++
++ /* An obviously unaligned address. */
++ return 0;
++}
++
++
++/* Vectors to keep interesting information about registers where it can easily
++ be got. We used to use the actual mode value as the bit number, but there
++ are more than 32 modes now. Instead we use two tables: one indexed by
++ hard register number, and one indexed by mode. */
++
++/* The purpose of sparc_mode_class is to shrink the range of modes so that
++ they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
++ mapped into one sparc_mode_class mode. */
++
++enum sparc_mode_class {
++ H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
++ SF_MODE, DF_MODE, TF_MODE, OF_MODE,
++ CC_MODE, CCFP_MODE
++};
++
++/* Modes for single-word and smaller quantities. */
++#define S_MODES \
++ ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
++
++/* Modes for double-word and smaller quantities. */
++#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
++
++/* Modes for quad-word and smaller quantities. */
++#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
++
++/* Modes for 8-word and smaller quantities. */
++#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
++
++/* Modes for single-float quantities. */
++#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
++
++/* Modes for double-float and smaller quantities. */
++#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
++
++/* Modes for quad-float and smaller quantities. */
++#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
++
++/* Modes for quad-float pairs and smaller quantities. */
++#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
++
++/* Modes for double-float only quantities. */
++#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
++
++/* Modes for quad-float and double-float only quantities. */
++#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
++
++/* Modes for quad-float pairs and double-float only quantities. */
++#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
++
++/* Modes for condition codes. */
++#define CC_MODES (1 << (int) CC_MODE)
++#define CCFP_MODES (1 << (int) CCFP_MODE)
++
++/* Value is 1 if register/mode pair is acceptable on sparc.
++
++ The funny mixture of D and T modes is because integer operations
++ do not specially operate on tetra quantities, so non-quad-aligned
++ registers can hold quadword quantities (except %o4 and %i4 because
++ they cross fixed registers).
++
++ ??? Note that, despite the settings, non-double-aligned parameter
++ registers can hold double-word quantities in 32-bit mode. */
++
++/* This points to either the 32-bit or the 64-bit version. */
++static const int *hard_regno_mode_classes;
++
++static const int hard_32bit_mode_classes[] = {
++ S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
++ T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
++ T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
++ T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
++
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
++
++ /* FP regs f32 to f63. Only the even numbered registers actually exist,
++ and none can hold SFmode/SImode values. */
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++
++ /* %fcc[0123] */
++ CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
++
++ /* %icc, %sfp, %gsr */
++ CC_MODES, 0, D_MODES
++};
++
++static const int hard_64bit_mode_classes[] = {
++ D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
++ O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
++ T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
++ O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
++
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
++ OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
++
++ /* FP regs f32 to f63. Only the even numbered registers actually exist,
++ and none can hold SFmode/SImode values. */
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
++
++ /* %fcc[0123] */
++ CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
++
++ /* %icc, %sfp, %gsr */
++ CC_MODES, 0, D_MODES
++};
++
++static int sparc_mode_class [NUM_MACHINE_MODES];
++
++enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
++
++static void
++sparc_init_modes (void)
++{
++ int i;
++
++ for (i = 0; i < NUM_MACHINE_MODES; i++)
++ {
++ machine_mode m = (machine_mode) i;
++ unsigned int size = GET_MODE_SIZE (m);
++
++ switch (GET_MODE_CLASS (m))
++ {
++ case MODE_INT:
++ case MODE_PARTIAL_INT:
++ case MODE_COMPLEX_INT:
++ if (size < 4)
++ sparc_mode_class[i] = 1 << (int) H_MODE;
++ else if (size == 4)
++ sparc_mode_class[i] = 1 << (int) S_MODE;
++ else if (size == 8)
++ sparc_mode_class[i] = 1 << (int) D_MODE;
++ else if (size == 16)
++ sparc_mode_class[i] = 1 << (int) T_MODE;
++ else if (size == 32)
++ sparc_mode_class[i] = 1 << (int) O_MODE;
++ else
++ sparc_mode_class[i] = 0;
++ break;
++ case MODE_VECTOR_INT:
++ if (size == 4)
++ sparc_mode_class[i] = 1 << (int) SF_MODE;
++ else if (size == 8)
++ sparc_mode_class[i] = 1 << (int) DF_MODE;
++ else
++ sparc_mode_class[i] = 0;
++ break;
++ case MODE_FLOAT:
++ case MODE_COMPLEX_FLOAT:
++ if (size == 4)
++ sparc_mode_class[i] = 1 << (int) SF_MODE;
++ else if (size == 8)
++ sparc_mode_class[i] = 1 << (int) DF_MODE;
++ else if (size == 16)
++ sparc_mode_class[i] = 1 << (int) TF_MODE;
++ else if (size == 32)
++ sparc_mode_class[i] = 1 << (int) OF_MODE;
++ else
++ sparc_mode_class[i] = 0;
++ break;
++ case MODE_CC:
++ if (m == CCFPmode || m == CCFPEmode)
++ sparc_mode_class[i] = 1 << (int) CCFP_MODE;
++ else
++ sparc_mode_class[i] = 1 << (int) CC_MODE;
++ break;
++ default:
++ sparc_mode_class[i] = 0;
++ break;
++ }
++ }
++
++ if (TARGET_ARCH64)
++ hard_regno_mode_classes = hard_64bit_mode_classes;
++ else
++ hard_regno_mode_classes = hard_32bit_mode_classes;
++
++ /* Initialize the array used by REGNO_REG_CLASS. */
++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++ {
++ if (i < 16 && TARGET_V8PLUS)
++ sparc_regno_reg_class[i] = I64_REGS;
++ else if (i < 32 || i == FRAME_POINTER_REGNUM)
++ sparc_regno_reg_class[i] = GENERAL_REGS;
++ else if (i < 64)
++ sparc_regno_reg_class[i] = FP_REGS;
++ else if (i < 96)
++ sparc_regno_reg_class[i] = EXTRA_FP_REGS;
++ else if (i < 100)
++ sparc_regno_reg_class[i] = FPCC_REGS;
++ else
++ sparc_regno_reg_class[i] = NO_REGS;
++ }
++}
++
++/* Return whether REGNO, a global or FP register, must be saved/restored. */
++
++static inline bool
++save_global_or_fp_reg_p (unsigned int regno,
++ int leaf_function ATTRIBUTE_UNUSED)
++{
++ return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
++}
++
++/* Return whether the return address register (%i7) is needed. */
++
++static inline bool
++return_addr_reg_needed_p (int leaf_function)
++{
++ /* If it is live, for example because of __builtin_return_address (0). */
++ if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
++ return true;
++
++ /* Otherwise, it is needed as save register if %o7 is clobbered. */
++ if (!leaf_function
++ /* Loading the GOT register clobbers %o7. */
++ || crtl->uses_pic_offset_table
++ || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
++ return true;
++
++ return false;
++}
++
++/* Return whether REGNO, a local or in register, must be saved/restored. */
++
++static bool
++save_local_or_in_reg_p (unsigned int regno, int leaf_function)
++{
++ /* General case: call-saved registers live at some point. */
++ if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
++ return true;
++
++ /* Frame pointer register (%fp) if needed. */
++ if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
++ return true;
++
++ /* Return address register (%i7) if needed. */
++ if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
++ return true;
++
++ /* GOT register (%l7) if needed. */
++ if (got_register_rtx && regno == REGNO (got_register_rtx))
++ return true;
++
++ /* If the function accesses prior frames, the frame pointer and the return
++ address of the previous frame must be saved on the stack. */
++ if (crtl->accesses_prior_frames
++ && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
++ return true;
++
++ return false;
++}
++
++/* Compute the frame size required by the function. This function is called
++ during the reload pass and also by sparc_expand_prologue. */
++
++static HOST_WIDE_INT
++sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
++{
++ HOST_WIDE_INT frame_size, apparent_frame_size;
++ int args_size, n_global_fp_regs = 0;
++ bool save_local_in_regs_p = false;
++ unsigned int i;
++
++ /* If the function allocates dynamic stack space, the dynamic offset is
++ computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
++ if (leaf_function && !cfun->calls_alloca)
++ args_size = 0;
++ else
++ args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
++
++ /* Calculate space needed for global registers. */
++ if (TARGET_ARCH64)
++ {
++ for (i = 0; i < 8; i++)
++ if (save_global_or_fp_reg_p (i, 0))
++ n_global_fp_regs += 2;
++ }
++ else
++ {
++ for (i = 0; i < 8; i += 2)
++ if (save_global_or_fp_reg_p (i, 0)
++ || save_global_or_fp_reg_p (i + 1, 0))
++ n_global_fp_regs += 2;
++ }
++
++ /* In the flat window model, find out which local and in registers need to
++ be saved. We don't reserve space in the current frame for them as they
++ will be spilled into the register window save area of the caller's frame.
++ However, as soon as we use this register window save area, we must create
++ that of the current frame to make it the live one. */
++ if (TARGET_FLAT)
++ for (i = 16; i < 32; i++)
++ if (save_local_or_in_reg_p (i, leaf_function))
++ {
++ save_local_in_regs_p = true;
++ break;
++ }
++
++ /* Calculate space needed for FP registers. */
++ for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
++ if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
++ n_global_fp_regs += 2;
++
++ if (size == 0
++ && n_global_fp_regs == 0
++ && args_size == 0
++ && !save_local_in_regs_p)
++ frame_size = apparent_frame_size = 0;
++ else
++ {
++ /* Start from the apparent frame size. */
++ apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
++
++ /* We need to add the size of the outgoing argument area. */
++ frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
++
++ /* And that of the register window save area. */
++ frame_size += FIRST_PARM_OFFSET (cfun->decl);
++
++ /* Finally, bump to the appropriate alignment. */
++ frame_size = SPARC_STACK_ALIGN (frame_size);
++ }
++
++ /* Set up values for use in prologue and epilogue. */
++ sparc_frame_size = frame_size;
++ sparc_apparent_frame_size = apparent_frame_size;
++ sparc_n_global_fp_regs = n_global_fp_regs;
++ sparc_save_local_in_regs_p = save_local_in_regs_p;
++
++ return frame_size;
++}
++
++/* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
++
++int
++sparc_initial_elimination_offset (int to)
++{
++ int offset;
++
++ if (to == STACK_POINTER_REGNUM)
++ offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
++ else
++ offset = 0;
++
++ offset += SPARC_STACK_BIAS;
++ return offset;
++}
++
++/* Output any necessary .register pseudo-ops. */
++
++void
++sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
++{
++ int i;
++
++ if (TARGET_ARCH32)
++ return;
++
++ /* Check if %g[2367] were used without
++ .register being printed for them already. */
++ for (i = 2; i < 8; i++)
++ {
++ if (df_regs_ever_live_p (i)
++ && ! sparc_hard_reg_printed [i])
++ {
++ sparc_hard_reg_printed [i] = 1;
++ /* %g7 is used as TLS base register, use #ignore
++ for it instead of #scratch. */
++ fprintf (file, "\t.register\t%%g%d, #%s\n", i,
++ i == 7 ? "ignore" : "scratch");
++ }
++ if (i == 3) i = 5;
++ }
++}
++
++#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
++
++#if PROBE_INTERVAL > 4096
++#error Cannot use indexed addressing mode for stack probing
++#endif
++
++/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
++ inclusive. These are offsets from the current stack pointer.
++
++ Note that we don't use the REG+REG addressing mode for the probes because
++ of the stack bias in 64-bit mode. And it doesn't really buy us anything
++ so the advantages of having a single code win here. */
++
++static void
++sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
++{
++ rtx g1 = gen_rtx_REG (Pmode, 1);
++
++ /* See if we have a constant small number of probes to generate. If so,
++ that's the easy case. */
++ if (size <= PROBE_INTERVAL)
++ {
++ emit_move_insn (g1, GEN_INT (first));
++ emit_insn (gen_rtx_SET (g1,
++ gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
++ emit_stack_probe (plus_constant (Pmode, g1, -size));
++ }
++
++ /* The run-time loop is made up of 9 insns in the generic case while the
++ compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
++ else if (size <= 4 * PROBE_INTERVAL)
++ {
++ HOST_WIDE_INT i;
++
++ emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
++ emit_insn (gen_rtx_SET (g1,
++ gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
++ emit_stack_probe (g1);
++
++ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
++ it exceeds SIZE. If only two probes are needed, this will not
++ generate any code. Then probe at FIRST + SIZE. */
++ for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
++ {
++ emit_insn (gen_rtx_SET (g1,
++ plus_constant (Pmode, g1, -PROBE_INTERVAL)));
++ emit_stack_probe (g1);
++ }
++
++ emit_stack_probe (plus_constant (Pmode, g1,
++ (i - PROBE_INTERVAL) - size));
++ }
++
++ /* Otherwise, do the same as above, but in a loop. Note that we must be
++ extra careful with variables wrapping around because we might be at
++ the very top (or the very bottom) of the address space and we have
++ to be able to handle this case properly; in particular, we use an
++ equality test for the loop condition. */
++ else
++ {
++ HOST_WIDE_INT rounded_size;
++ rtx g4 = gen_rtx_REG (Pmode, 4);
++
++ emit_move_insn (g1, GEN_INT (first));
++
++
++ /* Step 1: round SIZE to the previous multiple of the interval. */
++
++ rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
++ emit_move_insn (g4, GEN_INT (rounded_size));
++
++
++ /* Step 2: compute initial and final value of the loop counter. */
++
++ /* TEST_ADDR = SP + FIRST. */
++ emit_insn (gen_rtx_SET (g1,
++ gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
++
++ /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
++ emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
++
++
++ /* Step 3: the loop
++
++ while (TEST_ADDR != LAST_ADDR)
++ {
++ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
++ probe at TEST_ADDR
++ }
++
++ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
++ until it is equal to ROUNDED_SIZE. */
++
++ if (TARGET_ARCH64)
++ emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
++ else
++ emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
++
++
++ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
++ that SIZE is equal to ROUNDED_SIZE. */
++
++ if (size != rounded_size)
++ emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
++ }
++
++ /* Make sure nothing is scheduled before we are done. */
++ emit_insn (gen_blockage ());
++}
++
++/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
++ absolute addresses. */
++
++const char *
++output_probe_stack_range (rtx reg1, rtx reg2)
++{
++ static int labelno = 0;
++ char loop_lab[32];
++ rtx xops[2];
++
++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
++
++ /* Loop. */
++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
++
++ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
++ xops[0] = reg1;
++ xops[1] = GEN_INT (-PROBE_INTERVAL);
++ output_asm_insn ("add\t%0, %1, %0", xops);
++
++ /* Test if TEST_ADDR == LAST_ADDR. */
++ xops[1] = reg2;
++ output_asm_insn ("cmp\t%0, %1", xops);
++
++ /* Probe at TEST_ADDR and branch. */
++ if (TARGET_ARCH64)
++ fputs ("\tbne,pt\t%xcc,", asm_out_file);
++ else
++ fputs ("\tbne\t", asm_out_file);
++ assemble_name_raw (asm_out_file, loop_lab);
++ fputc ('\n', asm_out_file);
++ xops[1] = GEN_INT (SPARC_STACK_BIAS);
++ output_asm_insn (" st\t%%g0, [%0+%1]", xops);
++
++ return "";
++}
++
++/* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
++ needed. LOW is supposed to be double-word aligned for 32-bit registers.
++ SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
++ is the action to be performed if SAVE_P returns true and ACTION_FALSE
++ the action to be performed if it returns false. Return the new offset. */
++
++typedef bool (*sorr_pred_t) (unsigned int, int);
++typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
++
++static int
++emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
++ int offset, int leaf_function, sorr_pred_t save_p,
++ sorr_act_t action_true, sorr_act_t action_false)
++{
++ unsigned int i;
++ rtx mem;
++ rtx_insn *insn;
++
++ if (TARGET_ARCH64 && high <= 32)
++ {
++ int fp_offset = -1;
++
++ for (i = low; i < high; i++)
++ {
++ if (save_p (i, leaf_function))
++ {
++ mem = gen_frame_mem (DImode, plus_constant (Pmode,
++ base, offset));
++ if (action_true == SORR_SAVE)
++ {
++ insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
++ RTX_FRAME_RELATED_P (insn) = 1;
++ }
++ else /* action_true == SORR_RESTORE */
++ {
++ /* The frame pointer must be restored last since its old
++ value may be used as base address for the frame. This
++ is problematic in 64-bit mode only because of the lack
++ of double-word load instruction. */
++ if (i == HARD_FRAME_POINTER_REGNUM)
++ fp_offset = offset;
++ else
++ emit_move_insn (gen_rtx_REG (DImode, i), mem);
++ }
++ offset += 8;
++ }
++ else if (action_false == SORR_ADVANCE)
++ offset += 8;
++ }
++
++ if (fp_offset >= 0)
++ {
++ mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
++ emit_move_insn (hard_frame_pointer_rtx, mem);
++ }
++ }
++ else
++ {
++ for (i = low; i < high; i += 2)
++ {
++ bool reg0 = save_p (i, leaf_function);
++ bool reg1 = save_p (i + 1, leaf_function);
++ machine_mode mode;
++ int regno;
++
++ if (reg0 && reg1)
++ {
++ mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
++ regno = i;
++ }
++ else if (reg0)
++ {
++ mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
++ regno = i;
++ }
++ else if (reg1)
++ {
++ mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
++ regno = i + 1;
++ offset += 4;
++ }
++ else
++ {
++ if (action_false == SORR_ADVANCE)
++ offset += 8;
++ continue;
++ }
++
++ mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
++ if (action_true == SORR_SAVE)
++ {
++ insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
++ RTX_FRAME_RELATED_P (insn) = 1;
++ if (mode == DImode)
++ {
++ rtx set1, set2;
++ mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
++ offset));
++ set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
++ RTX_FRAME_RELATED_P (set1) = 1;
++ mem
++ = gen_frame_mem (SImode, plus_constant (Pmode, base,
++ offset + 4));
++ set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
++ RTX_FRAME_RELATED_P (set2) = 1;
++ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
++ gen_rtx_PARALLEL (VOIDmode,
++ gen_rtvec (2, set1, set2)));
++ }
++ }
++ else /* action_true == SORR_RESTORE */
++ emit_move_insn (gen_rtx_REG (mode, regno), mem);
++
++ /* Bump and round down to double word
++ in case we already bumped by 4. */
++ offset = ROUND_DOWN (offset + 8, 8);
++ }
++ }
++
++ return offset;
++}
++
++/* Emit code to adjust BASE to OFFSET. Return the new base. */
++
++static rtx
++emit_adjust_base_to_offset (rtx base, int offset)
++{
++ /* ??? This might be optimized a little as %g1 might already have a
++ value close enough that a single add insn will do. */
++ /* ??? Although, all of this is probably only a temporary fix because
++ if %g1 can hold a function result, then sparc_expand_epilogue will
++ lose (the result will be clobbered). */
++ rtx new_base = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (new_base, GEN_INT (offset));
++ emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
++ return new_base;
++}
++
++/* Emit code to save/restore call-saved global and FP registers. */
++
++static void
++emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
++{
++ if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
++ {
++ base = emit_adjust_base_to_offset (base, offset);
++ offset = 0;
++ }
++
++ offset
++ = emit_save_or_restore_regs (0, 8, base, offset, 0,
++ save_global_or_fp_reg_p, action, SORR_NONE);
++ emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
++ save_global_or_fp_reg_p, action, SORR_NONE);
++}
++
++/* Emit code to save/restore call-saved local and in registers. */
++
++static void
++emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
++{
++ if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
++ {
++ base = emit_adjust_base_to_offset (base, offset);
++ offset = 0;
++ }
++
++ emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
++ save_local_or_in_reg_p, action, SORR_ADVANCE);
++}
++
++/* Emit a window_save insn. */
++
++static rtx_insn *
++emit_window_save (rtx increment)
++{
++ rtx_insn *insn = emit_insn (gen_window_save (increment));
++ RTX_FRAME_RELATED_P (insn) = 1;
++
++ /* The incoming return address (%o7) is saved in %i7. */
++ add_reg_note (insn, REG_CFA_REGISTER,
++ gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
++ gen_rtx_REG (Pmode,
++ INCOMING_RETURN_ADDR_REGNUM)));
++
++ /* The window save event. */
++ add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
++
++ /* The CFA is %fp, the hard frame pointer. */
++ add_reg_note (insn, REG_CFA_DEF_CFA,
++ plus_constant (Pmode, hard_frame_pointer_rtx,
++ INCOMING_FRAME_SP_OFFSET));
++
++ return insn;
++}
++
++/* Generate an increment for the stack pointer. */
++
++static rtx
++gen_stack_pointer_inc (rtx increment)
++{
++ return gen_rtx_SET (stack_pointer_rtx,
++ gen_rtx_PLUS (Pmode,
++ stack_pointer_rtx,
++ increment));
++}
++
++/* Expand the function prologue. The prologue is responsible for reserving
++ storage for the frame, saving the call-saved registers and loading the
++ GOT register if needed. */
++
++void
++sparc_expand_prologue (void)
++{
++ HOST_WIDE_INT size;
++ rtx_insn *insn;
++
++ /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
++ on the final value of the flag means deferring the prologue/epilogue
++ expansion until just before the second scheduling pass, which is too
++ late to emit multiple epilogues or return insns.
++
++ Of course we are making the assumption that the value of the flag
++ will not change between now and its final value. Of the three parts
++ of the formula, only the last one can reasonably vary. Let's take a
++ closer look, after assuming that the first two ones are set to true
++ (otherwise the last value is effectively silenced).
++
++ If only_leaf_regs_used returns false, the global predicate will also
++ be false so the actual frame size calculated below will be positive.
++ As a consequence, the save_register_window insn will be emitted in
++ the instruction stream; now this insn explicitly references %fp
++ which is not a leaf register so only_leaf_regs_used will always
++ return false subsequently.
++
++ If only_leaf_regs_used returns true, we hope that the subsequent
++ optimization passes won't cause non-leaf registers to pop up. For
++ example, the regrename pass has special provisions to not rename to
++ non-leaf registers in a leaf function. */
++ sparc_leaf_function_p
++ = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
++
++ size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
++
++ if (flag_stack_usage_info)
++ current_function_static_stack_size = size;
++
++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
++ || flag_stack_clash_protection)
++ {
++ if (crtl->is_leaf && !cfun->calls_alloca)
++ {
++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
++ sparc_emit_probe_stack_range (get_stack_check_protect (),
++ size - get_stack_check_protect ());
++ }
++ else if (size > 0)
++ sparc_emit_probe_stack_range (get_stack_check_protect (), size);
++ }
++
++ if (size == 0)
++ ; /* do nothing. */
++ else if (sparc_leaf_function_p)
++ {
++ rtx size_int_rtx = GEN_INT (-size);
++
++ if (size <= 4096)
++ insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
++ else if (size <= 8192)
++ {
++ insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
++ RTX_FRAME_RELATED_P (insn) = 1;
++
++ /* %sp is still the CFA register. */
++ insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
++ }
++ else
++ {
++ rtx size_rtx = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (size_rtx, size_int_rtx);
++ insn = emit_insn (gen_stack_pointer_inc (size_rtx));
++ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
++ gen_stack_pointer_inc (size_int_rtx));
++ }
++
++ RTX_FRAME_RELATED_P (insn) = 1;
++ }
++ else
++ {
++ rtx size_int_rtx = GEN_INT (-size);
++
++ if (size <= 4096)
++ emit_window_save (size_int_rtx);
++ else if (size <= 8192)
++ {
++ emit_window_save (GEN_INT (-4096));
++
++ /* %sp is not the CFA register anymore. */
++ emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
++
++ /* Make sure no %fp-based store is issued until after the frame is
++ established. The offset between the frame pointer and the stack
++ pointer is calculated relative to the value of the stack pointer
++ at the end of the function prologue, and moving instructions that
++ access the stack via the frame pointer between the instructions
++ that decrement the stack pointer could result in accessing the
++ register window save area, which is volatile. */
++ emit_insn (gen_frame_blockage ());
++ }
++ else
++ {
++ rtx size_rtx = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (size_rtx, size_int_rtx);
++ emit_window_save (size_rtx);
++ }
++ }
++
++ if (sparc_leaf_function_p)
++ {
++ sparc_frame_base_reg = stack_pointer_rtx;
++ sparc_frame_base_offset = size + SPARC_STACK_BIAS;
++ }
++ else
++ {
++ sparc_frame_base_reg = hard_frame_pointer_rtx;
++ sparc_frame_base_offset = SPARC_STACK_BIAS;
++ }
++
++ if (sparc_n_global_fp_regs > 0)
++ emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
++ sparc_frame_base_offset
++ - sparc_apparent_frame_size,
++ SORR_SAVE);
++
++ /* Advertise that the data calculated just above are now valid. */
++ sparc_prologue_data_valid_p = true;
++}
++
++/* Expand the function prologue. The prologue is responsible for reserving
++ storage for the frame, saving the call-saved registers and loading the
++ GOT register if needed. */
++
++void
++sparc_flat_expand_prologue (void)
++{
++ HOST_WIDE_INT size;
++ rtx_insn *insn;
++
++ sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
++
++ size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
++
++ if (flag_stack_usage_info)
++ current_function_static_stack_size = size;
++
++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
++ || flag_stack_clash_protection)
++ {
++ if (crtl->is_leaf && !cfun->calls_alloca)
++ {
++ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
++ sparc_emit_probe_stack_range (get_stack_check_protect (),
++ size - get_stack_check_protect ());
++ }
++ else if (size > 0)
++ sparc_emit_probe_stack_range (get_stack_check_protect (), size);
++ }
++
++ if (sparc_save_local_in_regs_p)
++ emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
++ SORR_SAVE);
++
++ if (size == 0)
++ ; /* do nothing. */
++ else
++ {
++ rtx size_int_rtx, size_rtx;
++
++ size_rtx = size_int_rtx = GEN_INT (-size);
++
++ /* We establish the frame (i.e. decrement the stack pointer) first, even
++ if we use a frame pointer, because we cannot clobber any call-saved
++ registers, including the frame pointer, if we haven't created a new
++ register save area, for the sake of compatibility with the ABI. */
++ if (size <= 4096)
++ insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
++ else if (size <= 8192 && !frame_pointer_needed)
++ {
++ insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
++ RTX_FRAME_RELATED_P (insn) = 1;
++ insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
++ }
++ else
++ {
++ size_rtx = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (size_rtx, size_int_rtx);
++ insn = emit_insn (gen_stack_pointer_inc (size_rtx));
++ add_reg_note (insn, REG_CFA_ADJUST_CFA,
++ gen_stack_pointer_inc (size_int_rtx));
++ }
++ RTX_FRAME_RELATED_P (insn) = 1;
++
++ /* Ensure nothing is scheduled until after the frame is established. */
++ emit_insn (gen_blockage ());
++
++ if (frame_pointer_needed)
++ {
++ insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
++ gen_rtx_MINUS (Pmode,
++ stack_pointer_rtx,
++ size_rtx)));
++ RTX_FRAME_RELATED_P (insn) = 1;
++
++ add_reg_note (insn, REG_CFA_ADJUST_CFA,
++ gen_rtx_SET (hard_frame_pointer_rtx,
++ plus_constant (Pmode, stack_pointer_rtx,
++ size)));
++ }
++
++ if (return_addr_reg_needed_p (sparc_leaf_function_p))
++ {
++ rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
++ rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
++
++ insn = emit_move_insn (i7, o7);
++ RTX_FRAME_RELATED_P (insn) = 1;
++
++ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
++
++ /* Prevent this instruction from ever being considered dead,
++ even if this function has no epilogue. */
++ emit_use (i7);
++ }
++ }
++
++ if (frame_pointer_needed)
++ {
++ sparc_frame_base_reg = hard_frame_pointer_rtx;
++ sparc_frame_base_offset = SPARC_STACK_BIAS;
++ }
++ else
++ {
++ sparc_frame_base_reg = stack_pointer_rtx;
++ sparc_frame_base_offset = size + SPARC_STACK_BIAS;
++ }
++
++ if (sparc_n_global_fp_regs > 0)
++ emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
++ sparc_frame_base_offset
++ - sparc_apparent_frame_size,
++ SORR_SAVE);
++
++ /* Advertise that the data calculated just above are now valid. */
++ sparc_prologue_data_valid_p = true;
++}
++
++/* This function generates the assembly code for function entry, which boils
++ down to emitting the necessary .register directives. */
++
++static void
++sparc_asm_function_prologue (FILE *file)
++{
++ /* Check that the assumption we made in sparc_expand_prologue is valid. */
++ if (!TARGET_FLAT)
++ gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
++
++ sparc_output_scratch_registers (file);
++}
++
++/* Expand the function epilogue, either normal or part of a sibcall.
++ We emit all the instructions except the return or the call. */
++
++void
++sparc_expand_epilogue (bool for_eh)
++{
++ HOST_WIDE_INT size = sparc_frame_size;
++
++ if (cfun->calls_alloca)
++ emit_insn (gen_frame_blockage ());
++
++ if (sparc_n_global_fp_regs > 0)
++ emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
++ sparc_frame_base_offset
++ - sparc_apparent_frame_size,
++ SORR_RESTORE);
++
++ if (size == 0 || for_eh)
++ ; /* do nothing. */
++ else if (sparc_leaf_function_p)
++ {
++ if (size <= 4096)
++ emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
++ else if (size <= 8192)
++ {
++ emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
++ emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
++ }
++ else
++ {
++ rtx reg = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (reg, GEN_INT (size));
++ emit_insn (gen_stack_pointer_inc (reg));
++ }
++ }
++}
++
++/* Expand the function epilogue, either normal or part of a sibcall.
++ We emit all the instructions except the return or the call. */
++
++void
++sparc_flat_expand_epilogue (bool for_eh)
++{
++ HOST_WIDE_INT size = sparc_frame_size;
++
++ if (sparc_n_global_fp_regs > 0)
++ emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
++ sparc_frame_base_offset
++ - sparc_apparent_frame_size,
++ SORR_RESTORE);
++
++ /* If we have a frame pointer, we'll need both to restore it before the
++ frame is destroyed and use its current value in destroying the frame.
++ Since we don't have an atomic way to do that in the flat window model,
++ we save the current value into a temporary register (%g1). */
++ if (frame_pointer_needed && !for_eh)
++ emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
++
++ if (return_addr_reg_needed_p (sparc_leaf_function_p))
++ emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
++ gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
++
++ if (sparc_save_local_in_regs_p)
++ emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
++ sparc_frame_base_offset,
++ SORR_RESTORE);
++
++ if (size == 0 || for_eh)
++ ; /* do nothing. */
++ else if (frame_pointer_needed)
++ {
++ /* Make sure the frame is destroyed after everything else is done. */
++ emit_insn (gen_blockage ());
++
++ emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
++ }
++ else
++ {
++ /* Likewise. */
++ emit_insn (gen_blockage ());
++
++ if (size <= 4096)
++ emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
++ else if (size <= 8192)
++ {
++ emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
++ emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
++ }
++ else
++ {
++ rtx reg = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (reg, GEN_INT (size));
++ emit_insn (gen_stack_pointer_inc (reg));
++ }
++ }
++}
++
++/* Return true if it is appropriate to emit `return' instructions in the
++ body of a function. */
++
++bool
++sparc_can_use_return_insn_p (void)
++{
++ return sparc_prologue_data_valid_p
++ && sparc_n_global_fp_regs == 0
++ && TARGET_FLAT
++ ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
++ : (sparc_frame_size == 0 || !sparc_leaf_function_p);
++}
++
++/* This function generates the assembly code for function exit. */
++
++static void
++sparc_asm_function_epilogue (FILE *file)
++{
++ /* If the last two instructions of a function are "call foo; dslot;"
++ the return address might point to the first instruction in the next
++ function and we have to output a dummy nop for the sake of sane
++ backtraces in such cases. This is pointless for sibling calls since
++ the return address is explicitly adjusted. */
++
++ rtx_insn *insn = get_last_insn ();
++
++ rtx last_real_insn = prev_real_insn (insn);
++ if (last_real_insn
++ && NONJUMP_INSN_P (last_real_insn)
++ && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
++ last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
++
++ if (last_real_insn
++ && CALL_P (last_real_insn)
++ && !SIBLING_CALL_P (last_real_insn))
++ fputs("\tnop\n", file);
++
++ sparc_output_deferred_case_vectors ();
++}
++
++/* Output a 'restore' instruction. */
++
++static void
++output_restore (rtx pat)
++{
++ rtx operands[3];
++
++ if (! pat)
++ {
++ fputs ("\t restore\n", asm_out_file);
++ return;
++ }
++
++ gcc_assert (GET_CODE (pat) == SET);
++
++ operands[0] = SET_DEST (pat);
++ pat = SET_SRC (pat);
++
++ switch (GET_CODE (pat))
++ {
++ case PLUS:
++ operands[1] = XEXP (pat, 0);
++ operands[2] = XEXP (pat, 1);
++ output_asm_insn (" restore %r1, %2, %Y0", operands);
++ break;
++ case LO_SUM:
++ operands[1] = XEXP (pat, 0);
++ operands[2] = XEXP (pat, 1);
++ output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
++ break;
++ case ASHIFT:
++ operands[1] = XEXP (pat, 0);
++ gcc_assert (XEXP (pat, 1) == const1_rtx);
++ output_asm_insn (" restore %r1, %r1, %Y0", operands);
++ break;
++ default:
++ operands[1] = pat;
++ output_asm_insn (" restore %%g0, %1, %Y0", operands);
++ break;
++ }
++}
++
++/* Output a return. */
++
++const char *
++output_return (rtx_insn *insn)
++{
++ if (crtl->calls_eh_return)
++ {
++ /* If the function uses __builtin_eh_return, the eh_return
++ machinery occupies the delay slot. */
++ gcc_assert (!final_sequence);
++
++ if (flag_delayed_branch)
++ {
++ if (!TARGET_FLAT && TARGET_V9)
++ fputs ("\treturn\t%i7+8\n", asm_out_file);
++ else
++ {
++ if (!TARGET_FLAT)
++ fputs ("\trestore\n", asm_out_file);
++
++ fputs ("\tjmp\t%o7+8\n", asm_out_file);
++ }
++
++ fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
++ }
++ else
++ {
++ if (!TARGET_FLAT)
++ fputs ("\trestore\n", asm_out_file);
++
++ fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
++ fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
++ }
++ }
++ else if (sparc_leaf_function_p || TARGET_FLAT)
++ {
++ /* This is a leaf or flat function so we don't have to bother restoring
++ the register window, which frees us from dealing with the convoluted
++ semantics of restore/return. We simply output the jump to the
++ return address and the insn in the delay slot (if any). */
++
++ return "jmp\t%%o7+%)%#";
++ }
++ else
++ {
++ /* This is a regular function so we have to restore the register window.
++ We may have a pending insn for the delay slot, which will be either
++ combined with the 'restore' instruction or put in the delay slot of
++ the 'return' instruction. */
++
++ if (final_sequence)
++ {
++ rtx_insn *delay;
++ rtx pat;
++
++ delay = NEXT_INSN (insn);
++ gcc_assert (delay);
++
++ pat = PATTERN (delay);
++
++ if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
++ {
++ epilogue_renumber (&pat, 0);
++ return "return\t%%i7+%)%#";
++ }
++ else
++ {
++ output_asm_insn ("jmp\t%%i7+%)", NULL);
++
++ /* We're going to output the insn in the delay slot manually.
++ Make sure to output its source location first. */
++ PATTERN (delay) = gen_blockage ();
++ INSN_CODE (delay) = -1;
++ final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
++ INSN_LOCATION (delay) = UNKNOWN_LOCATION;
++
++ output_restore (pat);
++ }
++ }
++ else
++ {
++ /* The delay slot is empty. */
++ if (TARGET_V9)
++ return "return\t%%i7+%)\n\t nop";
++ else if (flag_delayed_branch)
++ return "jmp\t%%i7+%)\n\t restore";
++ else
++ return "restore\n\tjmp\t%%o7+%)\n\t nop";
++ }
++ }
++
++ return "";
++}
++
++/* Output a sibling call. */
++
++const char *
++output_sibcall (rtx_insn *insn, rtx call_operand)
++{
++ rtx operands[1];
++
++ gcc_assert (flag_delayed_branch);
++
++ operands[0] = call_operand;
++
++ if (sparc_leaf_function_p || TARGET_FLAT)
++ {
++ /* This is a leaf or flat function so we don't have to bother restoring
++ the register window. We simply output the jump to the function and
++ the insn in the delay slot (if any). */
++
++ gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
++
++ if (final_sequence)
++ output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
++ operands);
++ else
++ /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
++ it into branch if possible. */
++ output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
++ operands);
++ }
++ else
++ {
++ /* This is a regular function so we have to restore the register window.
++ We may have a pending insn for the delay slot, which will be combined
++ with the 'restore' instruction. */
++
++ output_asm_insn ("call\t%a0, 0", operands);
++
++ if (final_sequence)
++ {
++ rtx_insn *delay;
++ rtx pat;
++
++ delay = NEXT_INSN (insn);
++ gcc_assert (delay);
++
++ pat = PATTERN (delay);
++
++ /* We're going to output the insn in the delay slot manually.
++ Make sure to output its source location first. */
++ PATTERN (delay) = gen_blockage ();
++ INSN_CODE (delay) = -1;
++ final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
++ INSN_LOCATION (delay) = UNKNOWN_LOCATION;
++
++ output_restore (pat);
++ }
++ else
++ output_restore (NULL_RTX);
++ }
++
++ return "";
++}
++
++/* Functions for handling argument passing.
++
++ For 32-bit, the first 6 args are normally in registers and the rest are
++ pushed. Any arg that starts within the first 6 words is at least
++ partially passed in a register unless its data type forbids.
++
++ For 64-bit, the argument registers are laid out as an array of 16 elements
++ and arguments are added sequentially. The first 6 int args and up to the
++ first 16 fp args (depending on size) are passed in regs.
++
++ Slot Stack Integral Float Float in structure Double Long Double
++ ---- ----- -------- ----- ------------------ ------ -----------
++ 15 [SP+248] %f31 %f30,%f31 %d30
++ 14 [SP+240] %f29 %f28,%f29 %d28 %q28
++ 13 [SP+232] %f27 %f26,%f27 %d26
++ 12 [SP+224] %f25 %f24,%f25 %d24 %q24
++ 11 [SP+216] %f23 %f22,%f23 %d22
++ 10 [SP+208] %f21 %f20,%f21 %d20 %q20
++ 9 [SP+200] %f19 %f18,%f19 %d18
++ 8 [SP+192] %f17 %f16,%f17 %d16 %q16
++ 7 [SP+184] %f15 %f14,%f15 %d14
++ 6 [SP+176] %f13 %f12,%f13 %d12 %q12
++ 5 [SP+168] %o5 %f11 %f10,%f11 %d10
++ 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
++ 3 [SP+152] %o3 %f7 %f6,%f7 %d6
++ 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
++ 1 [SP+136] %o1 %f3 %f2,%f3 %d2
++ 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
++
++ Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
++
++ Integral arguments are always passed as 64-bit quantities appropriately
++ extended.
++
++ Passing of floating point values is handled as follows.
++ If a prototype is in scope:
++ If the value is in a named argument (i.e. not a stdarg function or a
++ value not part of the `...') then the value is passed in the appropriate
++ fp reg.
++ If the value is part of the `...' and is passed in one of the first 6
++ slots then the value is passed in the appropriate int reg.
++ If the value is part of the `...' and is not passed in one of the first 6
++ slots then the value is passed in memory.
++ If a prototype is not in scope:
++ If the value is one of the first 6 arguments the value is passed in the
++ appropriate integer reg and the appropriate fp reg.
++ If the value is not one of the first 6 arguments the value is passed in
++ the appropriate fp reg and in memory.
++
++
++ Summary of the calling conventions implemented by GCC on the SPARC:
++
++ 32-bit ABI:
++ size argument return value
++
++ small integer <4 int. reg. int. reg.
++ word 4 int. reg. int. reg.
++ double word 8 int. reg. int. reg.
++
++ _Complex small integer <8 int. reg. int. reg.
++ _Complex word 8 int. reg. int. reg.
++ _Complex double word 16 memory int. reg.
++
++ vector integer <=8 int. reg. FP reg.
++ vector integer >8 memory memory
++
++ float 4 int. reg. FP reg.
++ double 8 int. reg. FP reg.
++ long double 16 memory memory
++
++ _Complex float 8 memory FP reg.
++ _Complex double 16 memory FP reg.
++ _Complex long double 32 memory FP reg.
++
++ vector float any memory memory
++
++ aggregate any memory memory
++
++
++
++ 64-bit ABI:
++ size argument return value
++
++ small integer <8 int. reg. int. reg.
++ word 8 int. reg. int. reg.
++ double word 16 int. reg. int. reg.
++
++ _Complex small integer <16 int. reg. int. reg.
++ _Complex word 16 int. reg. int. reg.
++ _Complex double word 32 memory int. reg.
++
++ vector integer <=16 FP reg. FP reg.
++ vector integer 16<s<=32 memory FP reg.
++ vector integer >32 memory memory
++
++ float 4 FP reg. FP reg.
++ double 8 FP reg. FP reg.
++ long double 16 FP reg. FP reg.
++
++ _Complex float 8 FP reg. FP reg.
++ _Complex double 16 FP reg. FP reg.
++ _Complex long double 32 memory FP reg.
++
++ vector float <=16 FP reg. FP reg.
++ vector float 16<s<=32 memory FP reg.
++ vector float >32 memory memory
++
++ aggregate <=16 reg. reg.
++ aggregate 16<s<=32 memory reg.
++ aggregate >32 memory memory
++
++
++
++Note #1: complex floating-point types follow the extended SPARC ABIs as
++implemented by the Sun compiler.
++
++Note #2: integer vector types follow the scalar floating-point types
++conventions to match what is implemented by the Sun VIS SDK.
++
++Note #3: floating-point vector types follow the aggregate types
++conventions. */
++
++
++/* Maximum number of int regs for args. */
++#define SPARC_INT_ARG_MAX 6
++/* Maximum number of fp regs for args. */
++#define SPARC_FP_ARG_MAX 16
++/* Number of words (partially) occupied for a given size in units. */
++#define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
++
++/* Handle the INIT_CUMULATIVE_ARGS macro.
++ Initialize a variable CUM of type CUMULATIVE_ARGS
++ for a call to a function whose data type is FNTYPE.
++ For a library call, FNTYPE is 0. */
++
++void
++init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
++{
++ cum->words = 0;
++ cum->prototype_p = fntype && prototype_p (fntype);
++ cum->libcall_p = !fntype;
++}
++
++/* Handle promotion of pointer and integer arguments. */
++
++static machine_mode
++sparc_promote_function_mode (const_tree type, machine_mode mode,
++ int *punsignedp, const_tree, int)
++{
++ if (type && POINTER_TYPE_P (type))
++ {
++ *punsignedp = POINTERS_EXTEND_UNSIGNED;
++ return Pmode;
++ }
++
++ /* Integral arguments are passed as full words, as per the ABI. */
++ if (GET_MODE_CLASS (mode) == MODE_INT
++ && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
++ return word_mode;
++
++ return mode;
++}
++
++/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
++
++static bool
++sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
++{
++ return TARGET_ARCH64 ? true : false;
++}
++
++/* Handle the TARGET_PASS_BY_REFERENCE target hook.
++ Specify whether to pass the argument by reference. */
++
++static bool
++sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
++{
++ tree type = arg.type;
++ machine_mode mode = arg.mode;
++ if (TARGET_ARCH32)
++ /* Original SPARC 32-bit ABI says that structures and unions,
++ and quad-precision floats are passed by reference.
++ All other base types are passed in registers.
++
++ Extended ABI (as implemented by the Sun compiler) says that all
++ complex floats are passed by reference. Pass complex integers
++ in registers up to 8 bytes. More generally, enforce the 2-word
++ cap for passing arguments in registers.
++
++ Vector ABI (as implemented by the Sun VIS SDK) says that integer
++ vectors are passed like floats of the same size, that is in
++ registers up to 8 bytes. Pass all vector floats by reference
++ like structure and unions. */
++ return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
++ || mode == SCmode
++ /* Catch CDImode, TFmode, DCmode and TCmode. */
++ || GET_MODE_SIZE (mode) > 8
++ || (type
++ && VECTOR_TYPE_P (type)
++ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
++ else
++ /* Original SPARC 64-bit ABI says that structures and unions
++ smaller than 16 bytes are passed in registers, as well as
++ all other base types.
++
++ Extended ABI (as implemented by the Sun compiler) says that
++ complex floats are passed in registers up to 16 bytes. Pass
++ all complex integers in registers up to 16 bytes. More generally,
++ enforce the 2-word cap for passing arguments in registers.
++
++ Vector ABI (as implemented by the Sun VIS SDK) says that integer
++ vectors are passed like floats of the same size, that is in
++ registers (up to 16 bytes). Pass all vector floats like structure
++ and unions. */
++ return ((type
++ && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
++ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
++ /* Catch CTImode and TCmode. */
++ || GET_MODE_SIZE (mode) > 16);
++}
++
++/* Traverse the record TYPE recursively and call FUNC on its fields.
++ NAMED is true if this is for a named parameter. DATA is passed
++ to FUNC for each field. OFFSET is the starting position and
++ PACKED is true if we are inside a packed record. */
++
++template <typename T, void Func (const_tree, int, bool, T*)>
++static void
++traverse_record_type (const_tree type, bool named, T *data,
++ int offset = 0, bool packed = false)
++{
++ /* The ABI obviously doesn't specify how packed structures are passed.
++ These are passed in integer regs if possible, otherwise memory. */
++ if (!packed)
++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++ if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
++ {
++ packed = true;
++ break;
++ }
++
++ /* Walk the real fields, but skip those with no size or a zero size.
++ ??? Fields with variable offset are handled as having zero offset. */
++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++ if (TREE_CODE (field) == FIELD_DECL)
++ {
++ if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
++ continue;
++
++ int bitpos = offset;
++ if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
++ bitpos += int_bit_position (field);
++
++ tree field_type = TREE_TYPE (field);
++ if (TREE_CODE (field_type) == RECORD_TYPE)
++ traverse_record_type<T, Func> (field_type, named, data, bitpos,
++ packed);
++ else
++ {
++ const bool fp_type
++ = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
++ Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
++ data);
++ }
++ }
++}
++
++/* Handle recursive register classifying for structure layout. */
++
++typedef struct
++{
++ bool fp_regs; /* true if field eligible to FP registers. */
++ bool fp_regs_in_first_word; /* true if such field in first word. */
++} classify_data_t;
++
++/* A subroutine of function_arg_slotno. Classify the field. */
++
++inline void
++classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
++{
++ if (fp)
++ {
++ data->fp_regs = true;
++ if (bitpos < BITS_PER_WORD)
++ data->fp_regs_in_first_word = true;
++ }
++}
++
++/* Compute the slot number to pass an argument in.
++ Return the slot number or -1 if passing on the stack.
++
++ CUM is a variable of type CUMULATIVE_ARGS which gives info about
++ the preceding args and about the function being called.
++ MODE is the argument's machine mode.
++ TYPE is the data type of the argument (as a tree).
++ This is null for libcalls where that information may
++ not be available.
++ NAMED is nonzero if this argument is a named parameter
++ (otherwise it is an extra parameter matching an ellipsis).
++ INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
++ *PREGNO records the register number to use if scalar type.
++ *PPADDING records the amount of padding needed in words. */
++
++static int
++function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
++ const_tree type, bool named, bool incoming,
++ int *pregno, int *ppadding)
++{
++ const int regbase
++ = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
++ int slotno = cum->words, regno;
++ enum mode_class mclass = GET_MODE_CLASS (mode);
++
++ /* Silence warnings in the callers. */
++ *pregno = -1;
++ *ppadding = -1;
++
++ if (type && TREE_ADDRESSABLE (type))
++ return -1;
++
++ /* In 64-bit mode, objects requiring 16-byte alignment get it. */
++ if (TARGET_ARCH64
++ && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
++ && (slotno & 1) != 0)
++ {
++ slotno++;
++ *ppadding = 1;
++ }
++ else
++ *ppadding = 0;
++
++ /* Vector types deserve special treatment because they are polymorphic wrt
++ their mode, depending upon whether VIS instructions are enabled. */
++ if (type && VECTOR_TYPE_P (type))
++ {
++ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
++ {
++ /* The SPARC port defines no floating-point vector modes. */
++ gcc_assert (mode == BLKmode);
++ }
++ else
++ {
++ /* Integer vector types should either have a vector
++ mode or an integral mode, because we are guaranteed
++ by pass_by_reference that their size is not greater
++ than 16 bytes and TImode is 16-byte wide. */
++ gcc_assert (mode != BLKmode);
++
++ /* Integer vectors are handled like floats as per
++ the Sun VIS SDK. */
++ mclass = MODE_FLOAT;
++ }
++ }
++
++ switch (mclass)
++ {
++ case MODE_FLOAT:
++ case MODE_COMPLEX_FLOAT:
++ case MODE_VECTOR_INT:
++ if (TARGET_ARCH64 && TARGET_FPU && named)
++ {
++ /* If all arg slots are filled, then must pass on stack. */
++ if (slotno >= SPARC_FP_ARG_MAX)
++ return -1;
++
++ regno = SPARC_FP_ARG_FIRST + slotno * 2;
++ /* Arguments filling only one single FP register are
++ right-justified in the outer double FP register. */
++ if (GET_MODE_SIZE (mode) <= 4)
++ regno++;
++ break;
++ }
++ /* fallthrough */
++
++ case MODE_INT:
++ case MODE_COMPLEX_INT:
++ /* If all arg slots are filled, then must pass on stack. */
++ if (slotno >= SPARC_INT_ARG_MAX)
++ return -1;
++
++ regno = regbase + slotno;
++ break;
++
++ case MODE_RANDOM:
++ /* MODE is VOIDmode when generating the actual call. */
++ if (mode == VOIDmode)
++ return -1;
++
++ if (TARGET_64BIT && TARGET_FPU && named
++ && type
++ && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
++ {
++ /* If all arg slots are filled, then must pass on stack. */
++ if (slotno >= SPARC_FP_ARG_MAX)
++ return -1;
++
++ if (TREE_CODE (type) == RECORD_TYPE)
++ {
++ classify_data_t data = { false, false };
++ traverse_record_type<classify_data_t, classify_registers>
++ (type, named, &data);
++
++ if (data.fp_regs)
++ {
++ /* If all FP slots are filled except for the last one and
++ there is no FP field in the first word, then must pass
++ on stack. */
++ if (slotno >= SPARC_FP_ARG_MAX - 1
++ && !data.fp_regs_in_first_word)
++ return -1;
++ }
++ else
++ {
++ /* If all int slots are filled, then must pass on stack. */
++ if (slotno >= SPARC_INT_ARG_MAX)
++ return -1;
++ }
++
++ /* PREGNO isn't set since both int and FP regs can be used. */
++ return slotno;
++ }
++
++ regno = SPARC_FP_ARG_FIRST + slotno * 2;
++ }
++ else
++ {
++ /* If all arg slots are filled, then must pass on stack. */
++ if (slotno >= SPARC_INT_ARG_MAX)
++ return -1;
++
++ regno = regbase + slotno;
++ }
++ break;
++
++ default :
++ gcc_unreachable ();
++ }
++
++ *pregno = regno;
++ return slotno;
++}
++
++/* Handle recursive register counting/assigning for structure layout. */
++
++typedef struct
++{
++ int slotno; /* slot number of the argument. */
++ int regbase; /* regno of the base register. */
++ int intoffset; /* offset of the first pending integer field. */
++ int nregs; /* number of words passed in registers. */
++ bool stack; /* true if part of the argument is on the stack. */
++ rtx ret; /* return expression being built. */
++} assign_data_t;
++
++/* A subroutine of function_arg_record_value. Compute the number of integer
++ registers to be assigned between PARMS->intoffset and BITPOS. Return
++ true if at least one integer register is assigned or false otherwise. */
++
++static bool
++compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
++{
++ if (data->intoffset < 0)
++ return false;
++
++ const int intoffset = data->intoffset;
++ data->intoffset = -1;
++
++ const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
++ const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
++ const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
++ int nregs = (endbit - startbit) / BITS_PER_WORD;
++
++ if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
++ {
++ nregs = SPARC_INT_ARG_MAX - this_slotno;
++
++ /* We need to pass this field (partly) on the stack. */
++ data->stack = 1;
++ }
++
++ if (nregs <= 0)
++ return false;
++
++ *pnregs = nregs;
++ return true;
++}
++
++/* A subroutine of function_arg_record_value. Compute the number and the mode
++ of the FP registers to be assigned for FIELD. Return true if at least one
++ FP register is assigned or false otherwise. */
++
++static bool
++compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
++ int *pnregs, machine_mode *pmode)
++{
++ const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
++ machine_mode mode = DECL_MODE (field);
++ int nregs, nslots;
++
++ /* Slots are counted as words while regs are counted as having the size of
++ the (inner) mode. */
++ if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
++ {
++ mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
++ nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
++ }
++ else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
++ {
++ mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
++ nregs = 2;
++ }
++ else
++ nregs = 1;
++
++ nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
++
++ if (nslots > SPARC_FP_ARG_MAX - this_slotno)
++ {
++ nslots = SPARC_FP_ARG_MAX - this_slotno;
++ nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
++
++ /* We need to pass this field (partly) on the stack. */
++ data->stack = 1;
++
++ if (nregs <= 0)
++ return false;
++ }
++
++ *pnregs = nregs;
++ *pmode = mode;
++ return true;
++}
++
++/* A subroutine of function_arg_record_value. Count the number of registers
++ to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
++
++inline void
++count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
++{
++ if (fp)
++ {
++ int nregs;
++ machine_mode mode;
++
++ if (compute_int_layout (bitpos, data, &nregs))
++ data->nregs += nregs;
++
++ if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
++ data->nregs += nregs;
++ }
++ else
++ {
++ if (data->intoffset < 0)
++ data->intoffset = bitpos;
++ }
++}
++
++/* A subroutine of function_arg_record_value. Assign the bits of the
++ structure between PARMS->intoffset and BITPOS to integer registers. */
++
++static void
++assign_int_registers (int bitpos, assign_data_t *data)
++{
++ int intoffset = data->intoffset;
++ machine_mode mode;
++ int nregs;
++
++ if (!compute_int_layout (bitpos, data, &nregs))
++ return;
++
++ /* If this is the trailing part of a word, only load that much into
++ the register. Otherwise load the whole register. Note that in
++ the latter case we may pick up unwanted bits. It's not a problem
++ at the moment but may wish to revisit. */
++ if (intoffset % BITS_PER_WORD != 0)
++ mode = smallest_int_mode_for_size (BITS_PER_WORD
++ - intoffset % BITS_PER_WORD);
++ else
++ mode = word_mode;
++
++ const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
++ unsigned int regno = data->regbase + this_slotno;
++ intoffset /= BITS_PER_UNIT;
++
++ do
++ {
++ rtx reg = gen_rtx_REG (mode, regno);
++ XVECEXP (data->ret, 0, data->stack + data->nregs)
++ = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
++ data->nregs += 1;
++ mode = word_mode;
++ regno += 1;
++ intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
++ }
++ while (--nregs > 0);
++}
++
++/* A subroutine of function_arg_record_value. Assign FIELD at position
++ BITPOS to FP registers. */
++
++static void
++assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
++{
++ int nregs;
++ machine_mode mode;
++
++ if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
++ return;
++
++ const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
++ int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
++ if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
++ regno++;
++ int pos = bitpos / BITS_PER_UNIT;
++
++ do
++ {
++ rtx reg = gen_rtx_REG (mode, regno);
++ XVECEXP (data->ret, 0, data->stack + data->nregs)
++ = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
++ data->nregs += 1;
++ regno += GET_MODE_SIZE (mode) / 4;
++ pos += GET_MODE_SIZE (mode);
++ }
++ while (--nregs > 0);
++}
++
++/* A subroutine of function_arg_record_value. Assign FIELD and the bits of
++ the structure between PARMS->intoffset and BITPOS to registers. */
++
++inline void
++assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
++{
++ if (fp)
++ {
++ assign_int_registers (bitpos, data);
++
++ assign_fp_registers (field, bitpos, data);
++ }
++ else
++ {
++ if (data->intoffset < 0)
++ data->intoffset = bitpos;
++ }
++}
++
++/* Used by function_arg and function_value to implement the complex
++ conventions of the 64-bit ABI for passing and returning structures.
++ Return an expression valid as a return value for the FUNCTION_ARG
++ and TARGET_FUNCTION_VALUE.
++
++ TYPE is the data type of the argument (as a tree).
++ This is null for libcalls where that information may
++ not be available.
++ MODE is the argument's machine mode.
++ SLOTNO is the index number of the argument's slot in the parameter array.
++ NAMED is true if this argument is a named parameter
++ (otherwise it is an extra parameter matching an ellipsis).
++ REGBASE is the regno of the base register for the parameter array. */
++
++static rtx
++function_arg_record_value (const_tree type, machine_mode mode,
++ int slotno, bool named, int regbase)
++{
++ const int size = int_size_in_bytes (type);
++ assign_data_t data;
++ int nregs;
++
++ data.slotno = slotno;
++ data.regbase = regbase;
++
++ /* Count how many registers we need. */
++ data.nregs = 0;
++ data.intoffset = 0;
++ data.stack = false;
++ traverse_record_type<assign_data_t, count_registers> (type, named, &data);
++
++ /* Take into account pending integer fields. */
++ if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
++ data.nregs += nregs;
++
++ /* Allocate the vector and handle some annoying special cases. */
++ nregs = data.nregs;
++
++ if (nregs == 0)
++ {
++ /* ??? Empty structure has no value? Duh? */
++ if (size <= 0)
++ {
++ /* Though there's nothing really to store, return a word register
++ anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
++ leads to breakage due to the fact that there are zero bytes to
++ load. */
++ return gen_rtx_REG (mode, regbase);
++ }
++
++ /* ??? C++ has structures with no fields, and yet a size. Give up
++ for now and pass everything back in integer registers. */
++ nregs = CEIL_NWORDS (size);
++ if (nregs + slotno > SPARC_INT_ARG_MAX)
++ nregs = SPARC_INT_ARG_MAX - slotno;
++ }
++
++ gcc_assert (nregs > 0);
++
++ data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
++
++ /* If at least one field must be passed on the stack, generate
++ (parallel [(expr_list (nil) ...) ...]) so that all fields will
++ also be passed on the stack. We can't do much better because the
++ semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
++ of structures for which the fields passed exclusively in registers
++ are not at the beginning of the structure. */
++ if (data.stack)
++ XVECEXP (data.ret, 0, 0)
++ = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
++
++ /* Assign the registers. */
++ data.nregs = 0;
++ data.intoffset = 0;
++ traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
++
++ /* Assign pending integer fields. */
++ assign_int_registers (size * BITS_PER_UNIT, &data);
++
++ gcc_assert (data.nregs == nregs);
++
++ return data.ret;
++}
++
++/* Used by function_arg and function_value to implement the conventions
++ of the 64-bit ABI for passing and returning unions.
++ Return an expression valid as a return value for the FUNCTION_ARG
++ and TARGET_FUNCTION_VALUE.
++
++ SIZE is the size in bytes of the union.
++ MODE is the argument's machine mode.
++ SLOTNO is the index number of the argument's slot in the parameter array.
++ REGNO is the hard register the union will be passed in. */
++
++static rtx
++function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
++{
++ unsigned int nwords;
++
++ /* See comment in function_arg_record_value for empty structures. */
++ if (size <= 0)
++ return gen_rtx_REG (mode, regno);
++
++ if (slotno == SPARC_INT_ARG_MAX - 1)
++ nwords = 1;
++ else
++ nwords = CEIL_NWORDS (size);
++
++ rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
++
++ /* Unions are passed left-justified. */
++ for (unsigned int i = 0; i < nwords; i++)
++ XVECEXP (regs, 0, i)
++ = gen_rtx_EXPR_LIST (VOIDmode,
++ gen_rtx_REG (word_mode, regno + i),
++ GEN_INT (UNITS_PER_WORD * i));
++
++ return regs;
++}
++
++/* Used by function_arg and function_value to implement the conventions
++ of the 64-bit ABI for passing and returning BLKmode vectors.
++ Return an expression valid as a return value for the FUNCTION_ARG
++ and TARGET_FUNCTION_VALUE.
++
++ SIZE is the size in bytes of the vector.
++ SLOTNO is the index number of the argument's slot in the parameter array.
++ NAMED is true if this argument is a named parameter
++ (otherwise it is an extra parameter matching an ellipsis).
++ REGNO is the hard register the vector will be passed in. */
++
++static rtx
++function_arg_vector_value (int size, int slotno, bool named, int regno)
++{
++ const int mult = (named ? 2 : 1);
++ unsigned int nwords;
++
++ if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
++ nwords = 1;
++ else
++ nwords = CEIL_NWORDS (size);
++
++ rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
++
++ if (size < UNITS_PER_WORD)
++ XVECEXP (regs, 0, 0)
++ = gen_rtx_EXPR_LIST (VOIDmode,
++ gen_rtx_REG (SImode, regno),
++ const0_rtx);
++ else
++ for (unsigned int i = 0; i < nwords; i++)
++ XVECEXP (regs, 0, i)
++ = gen_rtx_EXPR_LIST (VOIDmode,
++ gen_rtx_REG (word_mode, regno + i * mult),
++ GEN_INT (i * UNITS_PER_WORD));
++
++ return regs;
++}
++
++/* Determine where to put an argument to a function.
++ Value is zero to push the argument on the stack,
++ or a hard register in which to store the argument.
++
++ CUM is a variable of type CUMULATIVE_ARGS which gives info about
++ the preceding args and about the function being called.
++ ARG is a description of the argument.
++ INCOMING_P is false for TARGET_FUNCTION_ARG, true for
++ TARGET_FUNCTION_INCOMING_ARG. */
++
++static rtx
++sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
++ bool incoming)
++{
++ const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
++ const int regbase
++ = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
++ int slotno, regno, padding;
++ tree type = arg.type;
++ machine_mode mode = arg.mode;
++ enum mode_class mclass = GET_MODE_CLASS (mode);
++ bool named = arg.named;
++
++ slotno
++ = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
++ if (slotno == -1)
++ return 0;
++
++ /* Integer vectors are handled like floats as per the Sun VIS SDK. */
++ if (type && VECTOR_INTEGER_TYPE_P (type))
++ mclass = MODE_FLOAT;
++
++ if (TARGET_ARCH32)
++ return gen_rtx_REG (mode, regno);
++
++ /* Structures up to 16 bytes in size are passed in arg slots on the stack
++ and are promoted to registers if possible. */
++ if (type && TREE_CODE (type) == RECORD_TYPE)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 16);
++
++ return function_arg_record_value (type, mode, slotno, named, regbase);
++ }
++
++ /* Unions up to 16 bytes in size are passed in integer registers. */
++ else if (type && TREE_CODE (type) == UNION_TYPE)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 16);
++
++ return function_arg_union_value (size, mode, slotno, regno);
++ }
++
++ /* Floating-point vectors up to 16 bytes are passed in registers. */
++ else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 16);
++
++ return function_arg_vector_value (size, slotno, named, regno);
++ }
++
++ /* v9 fp args in reg slots beyond the int reg slots get passed in regs
++ but also have the slot allocated for them.
++ If no prototype is in scope fp values in register slots get passed
++ in two places, either fp regs and int regs or fp regs and memory. */
++ else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
++ && SPARC_FP_REG_P (regno))
++ {
++ rtx reg = gen_rtx_REG (mode, regno);
++ if (cum->prototype_p || cum->libcall_p)
++ return reg;
++ else
++ {
++ rtx v0, v1;
++
++ if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
++ {
++ int intreg;
++
++ /* On incoming, we don't need to know that the value
++ is passed in %f0 and %i0, and it confuses other parts
++ causing needless spillage even on the simplest cases. */
++ if (incoming)
++ return reg;
++
++ intreg = (SPARC_OUTGOING_INT_ARG_FIRST
++ + (regno - SPARC_FP_ARG_FIRST) / 2);
++
++ v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
++ v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
++ const0_rtx);
++ return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
++ }
++ else
++ {
++ v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
++ v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
++ return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
++ }
++ }
++ }
++
++ /* All other aggregate types are passed in an integer register in a mode
++ corresponding to the size of the type. */
++ else if (type && AGGREGATE_TYPE_P (type))
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 16);
++
++ mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
++ }
++
++ return gen_rtx_REG (mode, regno);
++}
++
++/* Handle the TARGET_FUNCTION_ARG target hook. */
++
++static rtx
++sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
++{
++ return sparc_function_arg_1 (cum, arg, false);
++}
++
++/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
++
++static rtx
++sparc_function_incoming_arg (cumulative_args_t cum,
++ const function_arg_info &arg)
++{
++ return sparc_function_arg_1 (cum, arg, true);
++}
++
++/* For sparc64, objects requiring 16 byte alignment are passed that way. */
++
++static unsigned int
++sparc_function_arg_boundary (machine_mode mode, const_tree type)
++{
++ return ((TARGET_ARCH64
++ && (GET_MODE_ALIGNMENT (mode) == 128
++ || (type && TYPE_ALIGN (type) == 128)))
++ ? 128
++ : PARM_BOUNDARY);
++}
++
++/* For an arg passed partly in registers and partly in memory,
++ this is the number of bytes of registers used.
++ For args passed entirely in registers or entirely in memory, zero.
++
++ Any arg that starts in the first 6 regs but won't entirely fit in them
++ needs partial registers on v8. On v9, structures with integer
++ values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
++ values that begin in the last fp reg [where "last fp reg" varies with the
++ mode] will be split between that reg and memory. */
++
++static int
++sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
++{
++ int slotno, regno, padding;
++
++ /* We pass false for incoming here, it doesn't matter. */
++ slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
++ arg.named, false, &regno, &padding);
++
++ if (slotno == -1)
++ return 0;
++
++ if (TARGET_ARCH32)
++ {
++ /* We are guaranteed by pass_by_reference that the size of the
++ argument is not greater than 8 bytes, so we only need to return
++ one word if the argument is partially passed in registers. */
++ const int size = GET_MODE_SIZE (arg.mode);
++
++ if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
++ return UNITS_PER_WORD;
++ }
++ else
++ {
++ /* We are guaranteed by pass_by_reference that the size of the
++ argument is not greater than 16 bytes, so we only need to return
++ one word if the argument is partially passed in registers. */
++ if (arg.aggregate_type_p ())
++ {
++ const int size = int_size_in_bytes (arg.type);
++
++ if (size > UNITS_PER_WORD
++ && (slotno == SPARC_INT_ARG_MAX - 1
++ || slotno == SPARC_FP_ARG_MAX - 1))
++ return UNITS_PER_WORD;
++ }
++ else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
++ || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
++ || (arg.type && VECTOR_TYPE_P (arg.type)))
++ && !(TARGET_FPU && arg.named)))
++ {
++ const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
++ ? int_size_in_bytes (arg.type)
++ : GET_MODE_SIZE (arg.mode);
++
++ if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
++ return UNITS_PER_WORD;
++ }
++ else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
++ || (arg.type && VECTOR_TYPE_P (arg.type)))
++ {
++ const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
++ ? int_size_in_bytes (arg.type)
++ : GET_MODE_SIZE (arg.mode);
++
++ if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
++ return UNITS_PER_WORD;
++ }
++ }
++
++ return 0;
++}
++
++/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
++ Update the data in CUM to advance over argument ARG. */
++
++static void
++sparc_function_arg_advance (cumulative_args_t cum_v,
++ const function_arg_info &arg)
++{
++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
++ tree type = arg.type;
++ machine_mode mode = arg.mode;
++ int regno, padding;
++
++ /* We pass false for incoming here, it doesn't matter. */
++ function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
++
++ /* If argument requires leading padding, add it. */
++ cum->words += padding;
++
++ if (TARGET_ARCH32)
++ cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
++ else
++ {
++ /* For types that can have BLKmode, get the size from the type. */
++ if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
++ {
++ const int size = int_size_in_bytes (type);
++
++ /* See comment in function_arg_record_value for empty structures. */
++ if (size <= 0)
++ cum->words++;
++ else
++ cum->words += CEIL_NWORDS (size);
++ }
++ else
++ cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
++ }
++}
++
++/* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
++ are always stored left shifted in their argument slot. */
++
++static pad_direction
++sparc_function_arg_padding (machine_mode mode, const_tree type)
++{
++ if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
++ return PAD_UPWARD;
++
++ /* Fall back to the default. */
++ return default_function_arg_padding (mode, type);
++}
++
++/* Handle the TARGET_RETURN_IN_MEMORY target hook.
++ Specify whether to return the return value in memory. */
++
++static bool
++sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
++{
++ if (TARGET_ARCH32)
++ /* Original SPARC 32-bit ABI says that structures and unions, and
++ quad-precision floats are returned in memory. But note that the
++ first part is implemented through -fpcc-struct-return being the
++ default, so here we only implement -freg-struct-return instead.
++ All other base types are returned in registers.
++
++ Extended ABI (as implemented by the Sun compiler) says that
++ all complex floats are returned in registers (8 FP registers
++ at most for '_Complex long double'). Return all complex integers
++ in registers (4 at most for '_Complex long long').
++
++ Vector ABI (as implemented by the Sun VIS SDK) says that vector
++ integers are returned like floats of the same size, that is in
++ registers up to 8 bytes and in memory otherwise. Return all
++ vector floats in memory like structure and unions; note that
++ they always have BLKmode like the latter. */
++ return (TYPE_MODE (type) == BLKmode
++ || TYPE_MODE (type) == TFmode
++ || (TREE_CODE (type) == VECTOR_TYPE
++ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
++ else
++ /* Original SPARC 64-bit ABI says that structures and unions
++ smaller than 32 bytes are returned in registers, as well as
++ all other base types.
++
++ Extended ABI (as implemented by the Sun compiler) says that all
++ complex floats are returned in registers (8 FP registers at most
++ for '_Complex long double'). Return all complex integers in
++ registers (4 at most for '_Complex TItype').
++
++ Vector ABI (as implemented by the Sun VIS SDK) says that vector
++ integers are returned like floats of the same size, that is in
++ registers. Return all vector floats like structure and unions;
++ note that they always have BLKmode like the latter. */
++ return (TYPE_MODE (type) == BLKmode
++ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
++}
++
++/* Handle the TARGET_STRUCT_VALUE target hook.
++ Return where to find the structure return value address. */
++
++static rtx
++sparc_struct_value_rtx (tree fndecl, int incoming)
++{
++ if (TARGET_ARCH64)
++ return NULL_RTX;
++ else
++ {
++ rtx mem;
++
++ if (incoming)
++ mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
++ STRUCT_VALUE_OFFSET));
++ else
++ mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
++ STRUCT_VALUE_OFFSET));
++
++ /* Only follow the SPARC ABI for fixed-size structure returns.
++ Variable size structure returns are handled per the normal
++ procedures in GCC. This is enabled by -mstd-struct-return */
++ if (incoming == 2
++ && sparc_std_struct_return
++ && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
++ && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
++ {
++ /* We must check and adjust the return address, as it is optional
++ as to whether the return object is really provided. */
++ rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
++ rtx scratch = gen_reg_rtx (SImode);
++ rtx_code_label *endlab = gen_label_rtx ();
++
++ /* Calculate the return object size. */
++ tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
++ rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
++ /* Construct a temporary return value. */
++ rtx temp_val
++ = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
++
++ /* Implement SPARC 32-bit psABI callee return struct checking:
++
++ Fetch the instruction where we will return to and see if
++ it's an unimp instruction (the most significant 10 bits
++ will be zero). */
++ emit_move_insn (scratch, gen_rtx_MEM (SImode,
++ plus_constant (Pmode,
++ ret_reg, 8)));
++ /* Assume the size is valid and pre-adjust. */
++ emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
++ emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
++ 0, endlab);
++ emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
++ /* Write the address of the memory pointed to by temp_val into
++ the memory pointed to by mem. */
++ emit_move_insn (mem, XEXP (temp_val, 0));
++ emit_label (endlab);
++ }
++
++ return mem;
++ }
++}
++
++/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
++ For v9, function return values are subject to the same rules as arguments,
++ except that up to 32 bytes may be returned in registers. */
++
++static rtx
++sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
++{
++ /* Beware that the two values are swapped here wrt function_arg. */
++ const int regbase
++ = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
++ enum mode_class mclass = GET_MODE_CLASS (mode);
++ int regno;
++
++ /* Integer vectors are handled like floats as per the Sun VIS SDK.
++ Note that integer vectors larger than 16 bytes have BLKmode so
++ they need to be handled like floating-point vectors below. */
++ if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
++ mclass = MODE_FLOAT;
++
++ if (TARGET_ARCH64 && type)
++ {
++ /* Structures up to 32 bytes in size are returned in registers. */
++ if (TREE_CODE (type) == RECORD_TYPE)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 32);
++
++ return function_arg_record_value (type, mode, 0, true, regbase);
++ }
++
++ /* Unions up to 32 bytes in size are returned in integer registers. */
++ else if (TREE_CODE (type) == UNION_TYPE)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 32);
++
++ return function_arg_union_value (size, mode, 0, regbase);
++ }
++
++ /* Vectors up to 32 bytes are returned in FP registers. */
++ else if (VECTOR_TYPE_P (type) && mode == BLKmode)
++ {
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 32);
++
++ return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
++ }
++
++ /* Objects that require it are returned in FP registers. */
++ else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
++ ;
++
++ /* All other aggregate types are returned in an integer register in a
++ mode corresponding to the size of the type. */
++ else if (AGGREGATE_TYPE_P (type))
++ {
++ /* All other aggregate types are passed in an integer register
++ in a mode corresponding to the size of the type. */
++ const int size = int_size_in_bytes (type);
++ gcc_assert (size <= 32);
++
++ mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
++
++ /* ??? We probably should have made the same ABI change in
++ 3.4.0 as the one we made for unions. The latter was
++ required by the SCD though, while the former is not
++ specified, so we favored compatibility and efficiency.
++
++ Now we're stuck for aggregates larger than 16 bytes,
++ because OImode vanished in the meantime. Let's not
++ try to be unduly clever, and simply follow the ABI
++ for unions in that case. */
++ if (mode == BLKmode)
++ return function_arg_union_value (size, mode, 0, regbase);
++ else
++ mclass = MODE_INT;
++ }
++
++ /* We should only have pointer and integer types at this point. This
++ must match sparc_promote_function_mode. */
++ else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
++ mode = word_mode;
++ }
++
++ /* We should only have pointer and integer types at this point, except with
++ -freg-struct-return. This must match sparc_promote_function_mode. */
++ else if (TARGET_ARCH32
++ && !(type && AGGREGATE_TYPE_P (type))
++ && mclass == MODE_INT
++ && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
++ mode = word_mode;
++
++ if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
++ regno = SPARC_FP_ARG_FIRST;
++ else
++ regno = regbase;
++
++ return gen_rtx_REG (mode, regno);
++}
++
++/* Handle TARGET_FUNCTION_VALUE.
++ On the SPARC, the value is found in the first "output" register, but the
++ called function leaves it in the first "input" register. */
++
++static rtx
++sparc_function_value (const_tree valtype,
++ const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
++ bool outgoing)
++{
++ return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
++}
++
++/* Handle TARGET_LIBCALL_VALUE. */
++
++static rtx
++sparc_libcall_value (machine_mode mode,
++ const_rtx fun ATTRIBUTE_UNUSED)
++{
++ return sparc_function_value_1 (NULL_TREE, mode, false);
++}
++
++/* Handle FUNCTION_VALUE_REGNO_P.
++ On the SPARC, the first "output" reg is used for integer values, and the
++ first floating point register is used for floating point values. */
++
++static bool
++sparc_function_value_regno_p (const unsigned int regno)
++{
++ return (regno == 8 || (TARGET_FPU && regno == 32));
++}
++
++/* Do what is necessary for `va_start'. We look at the current function
++ to determine if stdarg or varargs is used and return the address of
++ the first unnamed parameter. */
++
++static rtx
++sparc_builtin_saveregs (void)
++{
++ int first_reg = crtl->args.info.words;
++ rtx address;
++ int regno;
++
++ for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
++ emit_move_insn (gen_rtx_MEM (word_mode,
++ gen_rtx_PLUS (Pmode,
++ frame_pointer_rtx,
++ GEN_INT (FIRST_PARM_OFFSET (0)
++ + (UNITS_PER_WORD
++ * regno)))),
++ gen_rtx_REG (word_mode,
++ SPARC_INCOMING_INT_ARG_FIRST + regno));
++
++ address = gen_rtx_PLUS (Pmode,
++ frame_pointer_rtx,
++ GEN_INT (FIRST_PARM_OFFSET (0)
++ + UNITS_PER_WORD * first_reg));
++
++ return address;
++}
++
++/* Implement `va_start' for stdarg. */
++
++static void
++sparc_va_start (tree valist, rtx nextarg)
++{
++ nextarg = expand_builtin_saveregs ();
++ std_expand_builtin_va_start (valist, nextarg);
++}
++
++/* Implement `va_arg' for stdarg. */
++
++static tree
++sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
++ gimple_seq *post_p)
++{
++ HOST_WIDE_INT size, rsize, align;
++ tree addr, incr;
++ bool indirect;
++ tree ptrtype = build_pointer_type (type);
++
++ if (pass_va_arg_by_reference (type))
++ {
++ indirect = true;
++ size = rsize = UNITS_PER_WORD;
++ align = 0;
++ }
++ else
++ {
++ indirect = false;
++ size = int_size_in_bytes (type);
++ rsize = ROUND_UP (size, UNITS_PER_WORD);
++ align = 0;
++
++ if (TARGET_ARCH64)
++ {
++ /* For SPARC64, objects requiring 16-byte alignment get it. */
++ if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
++ align = 2 * UNITS_PER_WORD;
++
++ /* SPARC-V9 ABI states that structures up to 16 bytes in size
++ are left-justified in their slots. */
++ if (AGGREGATE_TYPE_P (type))
++ {
++ if (size == 0)
++ size = rsize = UNITS_PER_WORD;
++ else
++ size = rsize;
++ }
++ }
++ }
++
++ incr = valist;
++ if (align)
++ {
++ incr = fold_build_pointer_plus_hwi (incr, align - 1);
++ incr = fold_convert (sizetype, incr);
++ incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
++ size_int (-align));
++ incr = fold_convert (ptr_type_node, incr);
++ }
++
++ gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
++ addr = incr;
++
++ if (BYTES_BIG_ENDIAN && size < rsize)
++ addr = fold_build_pointer_plus_hwi (incr, rsize - size);
++
++ if (indirect)
++ {
++ addr = fold_convert (build_pointer_type (ptrtype), addr);
++ addr = build_va_arg_indirect_ref (addr);
++ }
++
++ /* If the address isn't aligned properly for the type, we need a temporary.
++ FIXME: This is inefficient, usually we can do this in registers. */
++ else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
++ {
++ tree tmp = create_tmp_var (type, "va_arg_tmp");
++ tree dest_addr = build_fold_addr_expr (tmp);
++ tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
++ 3, dest_addr, addr, size_int (rsize));
++ TREE_ADDRESSABLE (tmp) = 1;
++ gimplify_and_add (copy, pre_p);
++ addr = dest_addr;
++ }
++
++ else
++ addr = fold_convert (ptrtype, addr);
++
++ incr = fold_build_pointer_plus_hwi (incr, rsize);
++ gimplify_assign (valist, incr, post_p);
++
++ return build_va_arg_indirect_ref (addr);
++}
++
++/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
++ Specify whether the vector mode is supported by the hardware. */
++
++static bool
++sparc_vector_mode_supported_p (machine_mode mode)
++{
++ return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
++}
++
++/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
++
++static machine_mode
++sparc_preferred_simd_mode (scalar_mode mode)
++{
++ if (TARGET_VIS)
++ switch (mode)
++ {
++ case E_SImode:
++ return V2SImode;
++ case E_HImode:
++ return V4HImode;
++ case E_QImode:
++ return V8QImode;
++
++ default:;
++ }
++
++ return word_mode;
++}
++
++ /* Implement TARGET_CAN_FOLLOW_JUMP. */
++
++static bool
++sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
++{
++ /* Do not fold unconditional jumps that have been created for crossing
++ partition boundaries. */
++ if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
++ return false;
++
++ return true;
++}
++
++/* Return the string to output an unconditional branch to LABEL, which is
++ the operand number of the label.
++
++ DEST is the destination insn (i.e. the label), INSN is the source. */
++
++const char *
++output_ubranch (rtx dest, rtx_insn *insn)
++{
++ static char string[64];
++ bool v9_form = false;
++ int delta;
++ char *p;
++
++ /* Even if we are trying to use cbcond for this, evaluate
++ whether we can use V9 branches as our backup plan. */
++ delta = 5000000;
++ if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
++ delta = (INSN_ADDRESSES (INSN_UID (dest))
++ - INSN_ADDRESSES (INSN_UID (insn)));
++
++ /* Leave some instructions for "slop". */
++ if (TARGET_V9 && delta >= -260000 && delta < 260000)
++ v9_form = true;
++
++ if (TARGET_CBCOND)
++ {
++ bool emit_nop = emit_cbcond_nop (insn);
++ bool far = false;
++ const char *rval;
++
++ if (delta < -500 || delta > 500)
++ far = true;
++
++ if (far)
++ {
++ if (v9_form)
++ rval = "ba,a,pt\t%%xcc, %l0";
++ else
++ rval = "b,a\t%l0";
++ }
++ else
++ {
++ if (emit_nop)
++ rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
++ else
++ rval = "cwbe\t%%g0, %%g0, %l0";
++ }
++ return rval;
++ }
++
++ if (v9_form)
++ strcpy (string, "ba%*,pt\t%%xcc, ");
++ else
++ strcpy (string, "b%*\t");
++
++ p = strchr (string, '\0');
++ *p++ = '%';
++ *p++ = 'l';
++ *p++ = '0';
++ *p++ = '%';
++ *p++ = '(';
++ *p = '\0';
++
++ return string;
++}
++
++/* Return the string to output a conditional branch to LABEL, which is
++ the operand number of the label. OP is the conditional expression.
++ XEXP (OP, 0) is assumed to be a condition code register (integer or
++ floating point) and its mode specifies what kind of comparison we made.
++
++ DEST is the destination insn (i.e. the label), INSN is the source.
++
++ REVERSED is nonzero if we should reverse the sense of the comparison.
++
++ ANNUL is nonzero if we should generate an annulling branch. */
++
++const char *
++output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
++ rtx_insn *insn)
++{
++ static char string[64];
++ enum rtx_code code = GET_CODE (op);
++ rtx cc_reg = XEXP (op, 0);
++ machine_mode mode = GET_MODE (cc_reg);
++ const char *labelno, *branch;
++ int spaces = 8, far;
++ char *p;
++
++ /* v9 branches are limited to +-1MB. If it is too far away,
++ change
++
++ bne,pt %xcc, .LC30
++
++ to
++
++ be,pn %xcc, .+12
++ nop
++ ba .LC30
++
++ and
++
++ fbne,a,pn %fcc2, .LC29
++
++ to
++
++ fbe,pt %fcc2, .+16
++ nop
++ ba .LC29 */
++
++ far = TARGET_V9 && (get_attr_length (insn) >= 3);
++ if (reversed ^ far)
++ {
++ /* Reversal of FP compares takes care -- an ordered compare
++ becomes an unordered compare and vice versa. */
++ if (mode == CCFPmode || mode == CCFPEmode)
++ code = reverse_condition_maybe_unordered (code);
++ else
++ code = reverse_condition (code);
++ }
++
++ /* Start by writing the branch condition. */
++ if (mode == CCFPmode || mode == CCFPEmode)
++ {
++ switch (code)
++ {
++ case NE:
++ branch = "fbne";
++ break;
++ case EQ:
++ branch = "fbe";
++ break;
++ case GE:
++ branch = "fbge";
++ break;
++ case GT:
++ branch = "fbg";
++ break;
++ case LE:
++ branch = "fble";
++ break;
++ case LT:
++ branch = "fbl";
++ break;
++ case UNORDERED:
++ branch = "fbu";
++ break;
++ case ORDERED:
++ branch = "fbo";
++ break;
++ case UNGT:
++ branch = "fbug";
++ break;
++ case UNLT:
++ branch = "fbul";
++ break;
++ case UNEQ:
++ branch = "fbue";
++ break;
++ case UNGE:
++ branch = "fbuge";
++ break;
++ case UNLE:
++ branch = "fbule";
++ break;
++ case LTGT:
++ branch = "fblg";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ /* ??? !v9: FP branches cannot be preceded by another floating point
++ insn. Because there is currently no concept of pre-delay slots,
++ we can fix this only by always emitting a nop before a floating
++ point branch. */
++
++ string[0] = '\0';
++ if (! TARGET_V9)
++ strcpy (string, "nop\n\t");
++ strcat (string, branch);
++ }
++ else
++ {
++ switch (code)
++ {
++ case NE:
++ if (mode == CCVmode || mode == CCXVmode)
++ branch = "bvs";
++ else
++ branch = "bne";
++ break;
++ case EQ:
++ if (mode == CCVmode || mode == CCXVmode)
++ branch = "bvc";
++ else
++ branch = "be";
++ break;
++ case GE:
++ if (mode == CCNZmode || mode == CCXNZmode)
++ branch = "bpos";
++ else
++ branch = "bge";
++ break;
++ case GT:
++ branch = "bg";
++ break;
++ case LE:
++ branch = "ble";
++ break;
++ case LT:
++ if (mode == CCNZmode || mode == CCXNZmode)
++ branch = "bneg";
++ else
++ branch = "bl";
++ break;
++ case GEU:
++ branch = "bgeu";
++ break;
++ case GTU:
++ branch = "bgu";
++ break;
++ case LEU:
++ branch = "bleu";
++ break;
++ case LTU:
++ branch = "blu";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ strcpy (string, branch);
++ }
++ spaces -= strlen (branch);
++ p = strchr (string, '\0');
++
++ /* Now add the annulling, the label, and a possible noop. */
++ if (annul && ! far)
++ {
++ strcpy (p, ",a");
++ p += 2;
++ spaces -= 2;
++ }
++
++ if (TARGET_V9)
++ {
++ rtx note;
++ int v8 = 0;
++
++ if (! far && insn && INSN_ADDRESSES_SET_P ())
++ {
++ int delta = (INSN_ADDRESSES (INSN_UID (dest))
++ - INSN_ADDRESSES (INSN_UID (insn)));
++ /* Leave some instructions for "slop". */
++ if (delta < -260000 || delta >= 260000)
++ v8 = 1;
++ }
++
++ switch (mode)
++ {
++ case E_CCmode:
++ case E_CCNZmode:
++ case E_CCCmode:
++ case E_CCVmode:
++ labelno = "%%icc, ";
++ if (v8)
++ labelno = "";
++ break;
++ case E_CCXmode:
++ case E_CCXNZmode:
++ case E_CCXCmode:
++ case E_CCXVmode:
++ labelno = "%%xcc, ";
++ gcc_assert (!v8);
++ break;
++ case E_CCFPmode:
++ case E_CCFPEmode:
++ {
++ static char v9_fcc_labelno[] = "%%fccX, ";
++ /* Set the char indicating the number of the fcc reg to use. */
++ v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
++ labelno = v9_fcc_labelno;
++ if (v8)
++ {
++ gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
++ labelno = "";
++ }
++ }
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
++ {
++ strcpy (p,
++ ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
++ >= profile_probability::even ()) ^ far)
++ ? ",pt" : ",pn");
++ p += 3;
++ spaces -= 3;
++ }
++ }
++ else
++ labelno = "";
++
++ if (spaces > 0)
++ *p++ = '\t';
++ else
++ *p++ = ' ';
++ strcpy (p, labelno);
++ p = strchr (p, '\0');
++ if (far)
++ {
++ strcpy (p, ".+12\n\t nop\n\tb\t");
++ /* Skip the next insn if requested or
++ if we know that it will be a nop. */
++ if (annul || ! final_sequence)
++ p[3] = '6';
++ p += 14;
++ }
++ *p++ = '%';
++ *p++ = 'l';
++ *p++ = label + '0';
++ *p++ = '%';
++ *p++ = '#';
++ *p = '\0';
++
++ return string;
++}
++
++/* Emit a library call comparison between floating point X and Y.
++ COMPARISON is the operator to compare with (EQ, NE, GT, etc).
++ Return the new operator to be used in the comparison sequence.
++
++ TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
++ values as arguments instead of the TFmode registers themselves,
++ that's why we cannot call emit_float_lib_cmp. */
++
++rtx
++sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
++{
++ const char *qpfunc;
++ rtx slot0, slot1, result, tem, tem2, libfunc;
++ machine_mode mode;
++ enum rtx_code new_comparison;
++
++ switch (comparison)
++ {
++ case EQ:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
++ break;
++
++ case NE:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
++ break;
++
++ case GT:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
++ break;
++
++ case GE:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
++ break;
++
++ case LT:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
++ break;
++
++ case LE:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
++ break;
++
++ case ORDERED:
++ case UNORDERED:
++ case UNGT:
++ case UNLT:
++ case UNEQ:
++ case UNGE:
++ case UNLE:
++ case LTGT:
++ qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ if (TARGET_ARCH64)
++ {
++ if (MEM_P (x))
++ {
++ tree expr = MEM_EXPR (x);
++ if (expr)
++ mark_addressable (expr);
++ slot0 = x;
++ }
++ else
++ {
++ slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
++ emit_move_insn (slot0, x);
++ }
++
++ if (MEM_P (y))
++ {
++ tree expr = MEM_EXPR (y);
++ if (expr)
++ mark_addressable (expr);
++ slot1 = y;
++ }
++ else
++ {
++ slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
++ emit_move_insn (slot1, y);
++ }
++
++ libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
++ emit_library_call (libfunc, LCT_NORMAL,
++ DImode,
++ XEXP (slot0, 0), Pmode,
++ XEXP (slot1, 0), Pmode);
++ mode = DImode;
++ }
++ else
++ {
++ libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
++ emit_library_call (libfunc, LCT_NORMAL,
++ SImode,
++ x, TFmode, y, TFmode);
++ mode = SImode;
++ }
++
++
++ /* Immediately move the result of the libcall into a pseudo
++ register so reload doesn't clobber the value if it needs
++ the return register for a spill reg. */
++ result = gen_reg_rtx (mode);
++ emit_move_insn (result, hard_libcall_value (mode, libfunc));
++
++ switch (comparison)
++ {
++ default:
++ return gen_rtx_NE (VOIDmode, result, const0_rtx);
++ case ORDERED:
++ case UNORDERED:
++ new_comparison = (comparison == UNORDERED ? EQ : NE);
++ return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
++ case UNGT:
++ case UNGE:
++ new_comparison = (comparison == UNGT ? GT : NE);
++ return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
++ case UNLE:
++ return gen_rtx_NE (VOIDmode, result, const2_rtx);
++ case UNLT:
++ tem = gen_reg_rtx (mode);
++ if (TARGET_ARCH32)
++ emit_insn (gen_andsi3 (tem, result, const1_rtx));
++ else
++ emit_insn (gen_anddi3 (tem, result, const1_rtx));
++ return gen_rtx_NE (VOIDmode, tem, const0_rtx);
++ case UNEQ:
++ case LTGT:
++ tem = gen_reg_rtx (mode);
++ if (TARGET_ARCH32)
++ emit_insn (gen_addsi3 (tem, result, const1_rtx));
++ else
++ emit_insn (gen_adddi3 (tem, result, const1_rtx));
++ tem2 = gen_reg_rtx (mode);
++ if (TARGET_ARCH32)
++ emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
++ else
++ emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
++ new_comparison = (comparison == UNEQ ? EQ : NE);
++ return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
++ }
++
++ gcc_unreachable ();
++}
++
++/* Generate an unsigned DImode to FP conversion. This is the same code
++ optabs would emit if we didn't have TFmode patterns. */
++
++void
++sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
++{
++ rtx i0, i1, f0, in, out;
++
++ out = operands[0];
++ in = force_reg (DImode, operands[1]);
++ rtx_code_label *neglab = gen_label_rtx ();
++ rtx_code_label *donelab = gen_label_rtx ();
++ i0 = gen_reg_rtx (DImode);
++ i1 = gen_reg_rtx (DImode);
++ f0 = gen_reg_rtx (mode);
++
++ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
++
++ emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
++ emit_jump_insn (gen_jump (donelab));
++ emit_barrier ();
++
++ emit_label (neglab);
++
++ emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
++ emit_insn (gen_anddi3 (i1, in, const1_rtx));
++ emit_insn (gen_iordi3 (i0, i0, i1));
++ emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
++ emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
++
++ emit_label (donelab);
++}
++
++/* Generate an FP to unsigned DImode conversion. This is the same code
++ optabs would emit if we didn't have TFmode patterns. */
++
++void
++sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
++{
++ rtx i0, i1, f0, in, out, limit;
++
++ out = operands[0];
++ in = force_reg (mode, operands[1]);
++ rtx_code_label *neglab = gen_label_rtx ();
++ rtx_code_label *donelab = gen_label_rtx ();
++ i0 = gen_reg_rtx (DImode);
++ i1 = gen_reg_rtx (DImode);
++ limit = gen_reg_rtx (mode);
++ f0 = gen_reg_rtx (mode);
++
++ emit_move_insn (limit,
++ const_double_from_real_value (
++ REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
++ emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
++
++ emit_insn (gen_rtx_SET (out,
++ gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
++ emit_jump_insn (gen_jump (donelab));
++ emit_barrier ();
++
++ emit_label (neglab);
++
++ emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
++ emit_insn (gen_rtx_SET (i0,
++ gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
++ emit_insn (gen_movdi (i1, const1_rtx));
++ emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
++ emit_insn (gen_xordi3 (out, i0, i1));
++
++ emit_label (donelab);
++}
++
++/* Return the string to output a compare and branch instruction to DEST.
++ DEST is the destination insn (i.e. the label), INSN is the source,
++ and OP is the conditional expression. */
++
++const char *
++output_cbcond (rtx op, rtx dest, rtx_insn *insn)
++{
++ machine_mode mode = GET_MODE (XEXP (op, 0));
++ enum rtx_code code = GET_CODE (op);
++ const char *cond_str, *tmpl;
++ int far, emit_nop, len;
++ static char string[64];
++ char size_char;
++
++ /* Compare and Branch is limited to +-2KB. If it is too far away,
++ change
++
++ cxbne X, Y, .LC30
++
++ to
++
++ cxbe X, Y, .+16
++ nop
++ ba,pt xcc, .LC30
++ nop */
++
++ len = get_attr_length (insn);
++
++ far = len == 4;
++ emit_nop = len == 2;
++
++ if (far)
++ code = reverse_condition (code);
++
++ size_char = ((mode == SImode) ? 'w' : 'x');
++
++ switch (code)
++ {
++ case NE:
++ cond_str = "ne";
++ break;
++
++ case EQ:
++ cond_str = "e";
++ break;
++
++ case GE:
++ cond_str = "ge";
++ break;
++
++ case GT:
++ cond_str = "g";
++ break;
++
++ case LE:
++ cond_str = "le";
++ break;
++
++ case LT:
++ cond_str = "l";
++ break;
++
++ case GEU:
++ cond_str = "cc";
++ break;
++
++ case GTU:
++ cond_str = "gu";
++ break;
++
++ case LEU:
++ cond_str = "leu";
++ break;
++
++ case LTU:
++ cond_str = "cs";
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ if (far)
++ {
++ int veryfar = 1, delta;
++
++ if (INSN_ADDRESSES_SET_P ())
++ {
++ delta = (INSN_ADDRESSES (INSN_UID (dest))
++ - INSN_ADDRESSES (INSN_UID (insn)));
++ /* Leave some instructions for "slop". */
++ if (delta >= -260000 && delta < 260000)
++ veryfar = 0;
++ }
++
++ if (veryfar)
++ tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
++ else
++ tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
++ }
++ else
++ {
++ if (emit_nop)
++ tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
++ else
++ tmpl = "c%cb%s\t%%1, %%2, %%3";
++ }
++
++ snprintf (string, sizeof(string), tmpl, size_char, cond_str);
++
++ return string;
++}
++
++/* Return the string to output a conditional branch to LABEL, testing
++ register REG. LABEL is the operand number of the label; REG is the
++ operand number of the reg. OP is the conditional expression. The mode
++ of REG says what kind of comparison we made.
++
++ DEST is the destination insn (i.e. the label), INSN is the source.
++
++ REVERSED is nonzero if we should reverse the sense of the comparison.
++
++ ANNUL is nonzero if we should generate an annulling branch. */
++
++const char *
++output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
++ int annul, rtx_insn *insn)
++{
++ static char string[64];
++ enum rtx_code code = GET_CODE (op);
++ machine_mode mode = GET_MODE (XEXP (op, 0));
++ rtx note;
++ int far;
++ char *p;
++
++ /* branch on register are limited to +-128KB. If it is too far away,
++ change
++
++ brnz,pt %g1, .LC30
++
++ to
++
++ brz,pn %g1, .+12
++ nop
++ ba,pt %xcc, .LC30
++
++ and
++
++ brgez,a,pn %o1, .LC29
++
++ to
++
++ brlz,pt %o1, .+16
++ nop
++ ba,pt %xcc, .LC29 */
++
++ far = get_attr_length (insn) >= 3;
++
++ /* If not floating-point or if EQ or NE, we can just reverse the code. */
++ if (reversed ^ far)
++ code = reverse_condition (code);
++
++ /* Only 64-bit versions of these instructions exist. */
++ gcc_assert (mode == DImode);
++
++ /* Start by writing the branch condition. */
++
++ switch (code)
++ {
++ case NE:
++ strcpy (string, "brnz");
++ break;
++
++ case EQ:
++ strcpy (string, "brz");
++ break;
++
++ case GE:
++ strcpy (string, "brgez");
++ break;
++
++ case LT:
++ strcpy (string, "brlz");
++ break;
++
++ case LE:
++ strcpy (string, "brlez");
++ break;
++
++ case GT:
++ strcpy (string, "brgz");
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ p = strchr (string, '\0');
++
++ /* Now add the annulling, reg, label, and nop. */
++ if (annul && ! far)
++ {
++ strcpy (p, ",a");
++ p += 2;
++ }
++
++ if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
++ {
++ strcpy (p,
++ ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
++ >= profile_probability::even ()) ^ far)
++ ? ",pt" : ",pn");
++ p += 3;
++ }
++
++ *p = p < string + 8 ? '\t' : ' ';
++ p++;
++ *p++ = '%';
++ *p++ = '0' + reg;
++ *p++ = ',';
++ *p++ = ' ';
++ if (far)
++ {
++ int veryfar = 1, delta;
++
++ if (INSN_ADDRESSES_SET_P ())
++ {
++ delta = (INSN_ADDRESSES (INSN_UID (dest))
++ - INSN_ADDRESSES (INSN_UID (insn)));
++ /* Leave some instructions for "slop". */
++ if (delta >= -260000 && delta < 260000)
++ veryfar = 0;
++ }
++
++ strcpy (p, ".+12\n\t nop\n\t");
++ /* Skip the next insn if requested or
++ if we know that it will be a nop. */
++ if (annul || ! final_sequence)
++ p[3] = '6';
++ p += 12;
++ if (veryfar)
++ {
++ strcpy (p, "b\t");
++ p += 2;
++ }
++ else
++ {
++ strcpy (p, "ba,pt\t%%xcc, ");
++ p += 13;
++ }
++ }
++ *p++ = '%';
++ *p++ = 'l';
++ *p++ = '0' + label;
++ *p++ = '%';
++ *p++ = '#';
++ *p = '\0';
++
++ return string;
++}
++
++/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
++ Such instructions cannot be used in the delay slot of return insn on v9.
++ If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
++ */
++
++static int
++epilogue_renumber (register rtx *where, int test)
++{
++ register const char *fmt;
++ register int i;
++ register enum rtx_code code;
++
++ if (*where == 0)
++ return 0;
++
++ code = GET_CODE (*where);
++
++ switch (code)
++ {
++ case REG:
++ if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
++ return 1;
++ if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
++ *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
++ /* fallthrough */
++ case SCRATCH:
++ case CC0:
++ case PC:
++ case CONST_INT:
++ case CONST_WIDE_INT:
++ case CONST_DOUBLE:
++ return 0;
++
++ /* Do not replace the frame pointer with the stack pointer because
++ it can cause the delayed instruction to load below the stack.
++ This occurs when instructions like:
++
++ (set (reg/i:SI 24 %i0)
++ (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
++ (const_int -20 [0xffffffec])) 0))
++
++ are in the return delayed slot. */
++ case PLUS:
++ if (GET_CODE (XEXP (*where, 0)) == REG
++ && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
++ && (GET_CODE (XEXP (*where, 1)) != CONST_INT
++ || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
++ return 1;
++ break;
++
++ case MEM:
++ if (SPARC_STACK_BIAS
++ && GET_CODE (XEXP (*where, 0)) == REG
++ && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
++ return 1;
++ break;
++
++ default:
++ break;
++ }
++
++ fmt = GET_RTX_FORMAT (code);
++
++ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
++ {
++ if (fmt[i] == 'E')
++ {
++ register int j;
++ for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
++ if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
++ return 1;
++ }
++ else if (fmt[i] == 'e'
++ && epilogue_renumber (&(XEXP (*where, i)), test))
++ return 1;
++ }
++ return 0;
++}
++
++/* Leaf functions and non-leaf functions have different needs. */
++
++static const int
++reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
++
++static const int
++reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
++
++static const int *const reg_alloc_orders[] = {
++ reg_leaf_alloc_order,
++ reg_nonleaf_alloc_order};
++
++void
++order_regs_for_local_alloc (void)
++{
++ static int last_order_nonleaf = 1;
++
++ if (df_regs_ever_live_p (15) != last_order_nonleaf)
++ {
++ last_order_nonleaf = !last_order_nonleaf;
++ memcpy ((char *) reg_alloc_order,
++ (const char *) reg_alloc_orders[last_order_nonleaf],
++ FIRST_PSEUDO_REGISTER * sizeof (int));
++ }
++}
++
++/* Return 1 if REG and MEM are legitimate enough to allow the various
++ MEM<-->REG splits to be run. */
++
++int
++sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
++{
++ /* Punt if we are here by mistake. */
++ gcc_assert (reload_completed);
++
++ /* We must have an offsettable memory reference. */
++ if (!offsettable_memref_p (mem))
++ return 0;
++
++ /* If we have legitimate args for ldd/std, we do not want
++ the split to happen. */
++ if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
++ return 0;
++
++ /* Success. */
++ return 1;
++}
++
++/* Split a REG <-- MEM move into a pair of moves in MODE. */
++
++void
++sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
++{
++ rtx high_part = gen_highpart (mode, dest);
++ rtx low_part = gen_lowpart (mode, dest);
++ rtx word0 = adjust_address (src, mode, 0);
++ rtx word1 = adjust_address (src, mode, 4);
++
++ if (reg_overlap_mentioned_p (high_part, word1))
++ {
++ emit_move_insn_1 (low_part, word1);
++ emit_move_insn_1 (high_part, word0);
++ }
++ else
++ {
++ emit_move_insn_1 (high_part, word0);
++ emit_move_insn_1 (low_part, word1);
++ }
++}
++
++/* Split a MEM <-- REG move into a pair of moves in MODE. */
++
++void
++sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
++{
++ rtx word0 = adjust_address (dest, mode, 0);
++ rtx word1 = adjust_address (dest, mode, 4);
++ rtx high_part = gen_highpart (mode, src);
++ rtx low_part = gen_lowpart (mode, src);
++
++ emit_move_insn_1 (word0, high_part);
++ emit_move_insn_1 (word1, low_part);
++}
++
++/* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
++
++int
++sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
++{
++ /* Punt if we are here by mistake. */
++ gcc_assert (reload_completed);
++
++ if (GET_CODE (reg1) == SUBREG)
++ reg1 = SUBREG_REG (reg1);
++ if (GET_CODE (reg1) != REG)
++ return 0;
++ const int regno1 = REGNO (reg1);
++
++ if (GET_CODE (reg2) == SUBREG)
++ reg2 = SUBREG_REG (reg2);
++ if (GET_CODE (reg2) != REG)
++ return 0;
++ const int regno2 = REGNO (reg2);
++
++ if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
++ return 1;
++
++ if (TARGET_VIS3)
++ {
++ if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
++ || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
++ return 1;
++ }
++
++ return 0;
++}
++
++/* Split a REG <--> REG move into a pair of moves in MODE. */
++
++void
++sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
++{
++ rtx dest1 = gen_highpart (mode, dest);
++ rtx dest2 = gen_lowpart (mode, dest);
++ rtx src1 = gen_highpart (mode, src);
++ rtx src2 = gen_lowpart (mode, src);
++
++ /* Now emit using the real source and destination we found, swapping
++ the order if we detect overlap. */
++ if (reg_overlap_mentioned_p (dest1, src2))
++ {
++ emit_move_insn_1 (dest2, src2);
++ emit_move_insn_1 (dest1, src1);
++ }
++ else
++ {
++ emit_move_insn_1 (dest1, src1);
++ emit_move_insn_1 (dest2, src2);
++ }
++}
++
++/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
++ This makes them candidates for using ldd and std insns.
++
++ Note reg1 and reg2 *must* be hard registers. */
++
++int
++registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
++{
++ /* We might have been passed a SUBREG. */
++ if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
++ return 0;
++
++ if (REGNO (reg1) % 2 != 0)
++ return 0;
++
++ /* Integer ldd is deprecated in SPARC V9 */
++ if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
++ return 0;
++
++ return (REGNO (reg1) == REGNO (reg2) - 1);
++}
++
++/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
++ an ldd or std insn.
++
++ This can only happen when addr1 and addr2, the addresses in mem1
++ and mem2, are consecutive memory locations (addr1 + 4 == addr2).
++ addr1 must also be aligned on a 64-bit boundary.
++
++ Also iff dependent_reg_rtx is not null it should not be used to
++ compute the address for mem1, i.e. we cannot optimize a sequence
++ like:
++ ld [%o0], %o0
++ ld [%o0 + 4], %o1
++ to
++ ldd [%o0], %o0
++ nor:
++ ld [%g3 + 4], %g3
++ ld [%g3], %g2
++ to
++ ldd [%g3], %g2
++
++ But, note that the transformation from:
++ ld [%g2 + 4], %g3
++ ld [%g2], %g2
++ to
++ ldd [%g2], %g2
++ is perfectly fine. Thus, the peephole2 patterns always pass us
++ the destination register of the first load, never the second one.
++
++ For stores we don't have a similar problem, so dependent_reg_rtx is
++ NULL_RTX. */
++
++int
++mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
++{
++ rtx addr1, addr2;
++ unsigned int reg1;
++ HOST_WIDE_INT offset1;
++
++ /* The mems cannot be volatile. */
++ if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
++ return 0;
++
++ /* MEM1 should be aligned on a 64-bit boundary. */
++ if (MEM_ALIGN (mem1) < 64)
++ return 0;
++
++ addr1 = XEXP (mem1, 0);
++ addr2 = XEXP (mem2, 0);
++
++ /* Extract a register number and offset (if used) from the first addr. */
++ if (GET_CODE (addr1) == PLUS)
++ {
++ /* If not a REG, return zero. */
++ if (GET_CODE (XEXP (addr1, 0)) != REG)
++ return 0;
++ else
++ {
++ reg1 = REGNO (XEXP (addr1, 0));
++ /* The offset must be constant! */
++ if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
++ return 0;
++ offset1 = INTVAL (XEXP (addr1, 1));
++ }
++ }
++ else if (GET_CODE (addr1) != REG)
++ return 0;
++ else
++ {
++ reg1 = REGNO (addr1);
++ /* This was a simple (mem (reg)) expression. Offset is 0. */
++ offset1 = 0;
++ }
++
++ /* Make sure the second address is a (mem (plus (reg) (const_int). */
++ if (GET_CODE (addr2) != PLUS)
++ return 0;
++
++ if (GET_CODE (XEXP (addr2, 0)) != REG
++ || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
++ return 0;
++
++ if (reg1 != REGNO (XEXP (addr2, 0)))
++ return 0;
++
++ if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
++ return 0;
++
++ /* The first offset must be evenly divisible by 8 to ensure the
++ address is 64-bit aligned. */
++ if (offset1 % 8 != 0)
++ return 0;
++
++ /* The offset for the second addr must be 4 more than the first addr. */
++ if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
++ return 0;
++
++ /* All the tests passed. addr1 and addr2 are valid for ldd and std
++ instructions. */
++ return 1;
++}
++
++/* Return the widened memory access made of MEM1 and MEM2 in MODE. */
++
++rtx
++widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
++{
++ rtx x = widen_memory_access (mem1, mode, 0);
++ MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
++ return x;
++}
++
++/* Return 1 if reg is a pseudo, or is the first register in
++ a hard register pair. This makes it suitable for use in
++ ldd and std insns. */
++
++int
++register_ok_for_ldd (rtx reg)
++{
++ /* We might have been passed a SUBREG. */
++ if (!REG_P (reg))
++ return 0;
++
++ if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
++ return (REGNO (reg) % 2 == 0);
++
++ return 1;
++}
++
++/* Return 1 if OP, a MEM, has an address which is known to be
++ aligned to an 8-byte boundary. */
++
++int
++memory_ok_for_ldd (rtx op)
++{
++ /* In 64-bit mode, we assume that the address is word-aligned. */
++ if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
++ return 0;
++
++ if (! can_create_pseudo_p ()
++ && !strict_memory_address_p (Pmode, XEXP (op, 0)))
++ return 0;
++
++ return 1;
++}
++
++/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
++
++static bool
++sparc_print_operand_punct_valid_p (unsigned char code)
++{
++ if (code == '#'
++ || code == '*'
++ || code == '('
++ || code == ')'
++ || code == '_'
++ || code == '&')
++ return true;
++
++ return false;
++}
++
++/* Implement TARGET_PRINT_OPERAND.
++ Print operand X (an rtx) in assembler syntax to file FILE.
++ CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
++ For `%' followed by punctuation, CODE is the punctuation and X is null. */
++
++static void
++sparc_print_operand (FILE *file, rtx x, int code)
++{
++ const char *s;
++
++ switch (code)
++ {
++ case '#':
++ /* Output an insn in a delay slot. */
++ if (final_sequence)
++ sparc_indent_opcode = 1;
++ else
++ fputs ("\n\t nop", file);
++ return;
++ case '*':
++ /* Output an annul flag if there's nothing for the delay slot and we
++ are optimizing. This is always used with '(' below.
++ Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
++ this is a dbx bug. So, we only do this when optimizing.
++ On UltraSPARC, a branch in a delay slot causes a pipeline flush.
++ Always emit a nop in case the next instruction is a branch. */
++ if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
++ fputs (",a", file);
++ return;
++ case '(':
++ /* Output a 'nop' if there's nothing for the delay slot and we are
++ not optimizing. This is always used with '*' above. */
++ if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
++ fputs ("\n\t nop", file);
++ else if (final_sequence)
++ sparc_indent_opcode = 1;
++ return;
++ case ')':
++ /* Output the right displacement from the saved PC on function return.
++ The caller may have placed an "unimp" insn immediately after the call
++ so we have to account for it. This insn is used in the 32-bit ABI
++ when calling a function that returns a non zero-sized structure. The
++ 64-bit ABI doesn't have it. Be careful to have this test be the same
++ as that for the call. The exception is when sparc_std_struct_return
++ is enabled, the psABI is followed exactly and the adjustment is made
++ by the code in sparc_struct_value_rtx. The call emitted is the same
++ when sparc_std_struct_return is enabled. */
++ if (!TARGET_ARCH64
++ && cfun->returns_struct
++ && !sparc_std_struct_return
++ && DECL_SIZE (DECL_RESULT (current_function_decl))
++ && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
++ == INTEGER_CST
++ && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
++ fputs ("12", file);
++ else
++ fputc ('8', file);
++ return;
++ case '_':
++ /* Output the Embedded Medium/Anywhere code model base register. */
++ fputs (EMBMEDANY_BASE_REG, file);
++ return;
++ case '&':
++ /* Print some local dynamic TLS name. */
++ if (const char *name = get_some_local_dynamic_name ())
++ assemble_name (file, name);
++ else
++ output_operand_lossage ("'%%&' used without any "
++ "local dynamic TLS references");
++ return;
++
++ case 'Y':
++ /* Adjust the operand to take into account a RESTORE operation. */
++ if (GET_CODE (x) == CONST_INT)
++ break;
++ else if (GET_CODE (x) != REG)
++ output_operand_lossage ("invalid %%Y operand");
++ else if (REGNO (x) < 8)
++ fputs (reg_names[REGNO (x)], file);
++ else if (REGNO (x) >= 24 && REGNO (x) < 32)
++ fputs (reg_names[REGNO (x)-16], file);
++ else
++ output_operand_lossage ("invalid %%Y operand");
++ return;
++ case 'L':
++ /* Print out the low order register name of a register pair. */
++ if (WORDS_BIG_ENDIAN)
++ fputs (reg_names[REGNO (x)+1], file);
++ else
++ fputs (reg_names[REGNO (x)], file);
++ return;
++ case 'H':
++ /* Print out the high order register name of a register pair. */
++ if (WORDS_BIG_ENDIAN)
++ fputs (reg_names[REGNO (x)], file);
++ else
++ fputs (reg_names[REGNO (x)+1], file);
++ return;
++ case 'R':
++ /* Print out the second register name of a register pair or quad.
++ I.e., R (%o0) => %o1. */
++ fputs (reg_names[REGNO (x)+1], file);
++ return;
++ case 'S':
++ /* Print out the third register name of a register quad.
++ I.e., S (%o0) => %o2. */
++ fputs (reg_names[REGNO (x)+2], file);
++ return;
++ case 'T':
++ /* Print out the fourth register name of a register quad.
++ I.e., T (%o0) => %o3. */
++ fputs (reg_names[REGNO (x)+3], file);
++ return;
++ case 'x':
++ /* Print a condition code register. */
++ if (REGNO (x) == SPARC_ICC_REG)
++ {
++ switch (GET_MODE (x))
++ {
++ case E_CCmode:
++ case E_CCNZmode:
++ case E_CCCmode:
++ case E_CCVmode:
++ s = "%icc";
++ break;
++ case E_CCXmode:
++ case E_CCXNZmode:
++ case E_CCXCmode:
++ case E_CCXVmode:
++ s = "%xcc";
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ fputs (s, file);
++ }
++ else
++ /* %fccN register */
++ fputs (reg_names[REGNO (x)], file);
++ return;
++ case 'm':
++ /* Print the operand's address only. */
++ output_address (GET_MODE (x), XEXP (x, 0));
++ return;
++ case 'r':
++ /* In this case we need a register. Use %g0 if the
++ operand is const0_rtx. */
++ if (x == const0_rtx
++ || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
++ {
++ fputs ("%g0", file);
++ return;
++ }
++ else
++ break;
++
++ case 'A':
++ switch (GET_CODE (x))
++ {
++ case IOR:
++ s = "or";
++ break;
++ case AND:
++ s = "and";
++ break;
++ case XOR:
++ s = "xor";
++ break;
++ default:
++ output_operand_lossage ("invalid %%A operand");
++ s = "";
++ break;
++ }
++ fputs (s, file);
++ return;
++
++ case 'B':
++ switch (GET_CODE (x))
++ {
++ case IOR:
++ s = "orn";
++ break;
++ case AND:
++ s = "andn";
++ break;
++ case XOR:
++ s = "xnor";
++ break;
++ default:
++ output_operand_lossage ("invalid %%B operand");
++ s = "";
++ break;
++ }
++ fputs (s, file);
++ return;
++
++ /* This is used by the conditional move instructions. */
++ case 'C':
++ {
++ machine_mode mode = GET_MODE (XEXP (x, 0));
++ switch (GET_CODE (x))
++ {
++ case NE:
++ if (mode == CCVmode || mode == CCXVmode)
++ s = "vs";
++ else
++ s = "ne";
++ break;
++ case EQ:
++ if (mode == CCVmode || mode == CCXVmode)
++ s = "vc";
++ else
++ s = "e";
++ break;
++ case GE:
++ if (mode == CCNZmode || mode == CCXNZmode)
++ s = "pos";
++ else
++ s = "ge";
++ break;
++ case GT:
++ s = "g";
++ break;
++ case LE:
++ s = "le";
++ break;
++ case LT:
++ if (mode == CCNZmode || mode == CCXNZmode)
++ s = "neg";
++ else
++ s = "l";
++ break;
++ case GEU:
++ s = "geu";
++ break;
++ case GTU:
++ s = "gu";
++ break;
++ case LEU:
++ s = "leu";
++ break;
++ case LTU:
++ s = "lu";
++ break;
++ case LTGT:
++ s = "lg";
++ break;
++ case UNORDERED:
++ s = "u";
++ break;
++ case ORDERED:
++ s = "o";
++ break;
++ case UNLT:
++ s = "ul";
++ break;
++ case UNLE:
++ s = "ule";
++ break;
++ case UNGT:
++ s = "ug";
++ break;
++ case UNGE:
++ s = "uge"
++ ; break;
++ case UNEQ:
++ s = "ue";
++ break;
++ default:
++ output_operand_lossage ("invalid %%C operand");
++ s = "";
++ break;
++ }
++ fputs (s, file);
++ return;
++ }
++
++ /* This are used by the movr instruction pattern. */
++ case 'D':
++ {
++ switch (GET_CODE (x))
++ {
++ case NE:
++ s = "ne";
++ break;
++ case EQ:
++ s = "e";
++ break;
++ case GE:
++ s = "gez";
++ break;
++ case LT:
++ s = "lz";
++ break;
++ case LE:
++ s = "lez";
++ break;
++ case GT:
++ s = "gz";
++ break;
++ default:
++ output_operand_lossage ("invalid %%D operand");
++ s = "";
++ break;
++ }
++ fputs (s, file);
++ return;
++ }
++
++ case 'b':
++ {
++ /* Print a sign-extended character. */
++ int i = trunc_int_for_mode (INTVAL (x), QImode);
++ fprintf (file, "%d", i);
++ return;
++ }
++
++ case 'f':
++ /* Operand must be a MEM; write its address. */
++ if (GET_CODE (x) != MEM)
++ output_operand_lossage ("invalid %%f operand");
++ output_address (GET_MODE (x), XEXP (x, 0));
++ return;
++
++ case 's':
++ {
++ /* Print a sign-extended 32-bit value. */
++ HOST_WIDE_INT i;
++ if (GET_CODE(x) == CONST_INT)
++ i = INTVAL (x);
++ else
++ {
++ output_operand_lossage ("invalid %%s operand");
++ return;
++ }
++ i = trunc_int_for_mode (i, SImode);
++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
++ return;
++ }
++
++ case 0:
++ /* Do nothing special. */
++ break;
++
++ default:
++ /* Undocumented flag. */
++ output_operand_lossage ("invalid operand output code");
++ }
++
++ if (GET_CODE (x) == REG)
++ fputs (reg_names[REGNO (x)], file);
++ else if (GET_CODE (x) == MEM)
++ {
++ fputc ('[', file);
++ /* Poor Sun assembler doesn't understand absolute addressing. */
++ if (CONSTANT_P (XEXP (x, 0)))
++ fputs ("%g0+", file);
++ output_address (GET_MODE (x), XEXP (x, 0));
++ fputc (']', file);
++ }
++ else if (GET_CODE (x) == HIGH)
++ {
++ fputs ("%hi(", file);
++ output_addr_const (file, XEXP (x, 0));
++ fputc (')', file);
++ }
++ else if (GET_CODE (x) == LO_SUM)
++ {
++ sparc_print_operand (file, XEXP (x, 0), 0);
++ if (TARGET_CM_MEDMID)
++ fputs ("+%l44(", file);
++ else
++ fputs ("+%lo(", file);
++ output_addr_const (file, XEXP (x, 1));
++ fputc (')', file);
++ }
++ else if (GET_CODE (x) == CONST_DOUBLE)
++ output_operand_lossage ("floating-point constant not a valid immediate operand");
++ else
++ output_addr_const (file, x);
++}
++
++/* Implement TARGET_PRINT_OPERAND_ADDRESS. */
++
++static void
++sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
++{
++ register rtx base, index = 0;
++ int offset = 0;
++ register rtx addr = x;
++
++ if (REG_P (addr))
++ fputs (reg_names[REGNO (addr)], file);
++ else if (GET_CODE (addr) == PLUS)
++ {
++ if (CONST_INT_P (XEXP (addr, 0)))
++ offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
++ else if (CONST_INT_P (XEXP (addr, 1)))
++ offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
++ else
++ base = XEXP (addr, 0), index = XEXP (addr, 1);
++ if (GET_CODE (base) == LO_SUM)
++ {
++ gcc_assert (USE_AS_OFFSETABLE_LO10
++ && TARGET_ARCH64
++ && ! TARGET_CM_MEDMID);
++ output_operand (XEXP (base, 0), 0);
++ fputs ("+%lo(", file);
++ output_address (VOIDmode, XEXP (base, 1));
++ fprintf (file, ")+%d", offset);
++ }
++ else
++ {
++ fputs (reg_names[REGNO (base)], file);
++ if (index == 0)
++ fprintf (file, "%+d", offset);
++ else if (REG_P (index))
++ fprintf (file, "+%s", reg_names[REGNO (index)]);
++ else if (GET_CODE (index) == SYMBOL_REF
++ || GET_CODE (index) == LABEL_REF
++ || GET_CODE (index) == CONST)
++ fputc ('+', file), output_addr_const (file, index);
++ else gcc_unreachable ();
++ }
++ }
++ else if (GET_CODE (addr) == MINUS
++ && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
++ {
++ output_addr_const (file, XEXP (addr, 0));
++ fputs ("-(", file);
++ output_addr_const (file, XEXP (addr, 1));
++ fputs ("-.)", file);
++ }
++ else if (GET_CODE (addr) == LO_SUM)
++ {
++ output_operand (XEXP (addr, 0), 0);
++ if (TARGET_CM_MEDMID)
++ fputs ("+%l44(", file);
++ else
++ fputs ("+%lo(", file);
++ output_address (VOIDmode, XEXP (addr, 1));
++ fputc (')', file);
++ }
++ else if (flag_pic
++ && GET_CODE (addr) == CONST
++ && GET_CODE (XEXP (addr, 0)) == MINUS
++ && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
++ && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
++ && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
++ {
++ addr = XEXP (addr, 0);
++ output_addr_const (file, XEXP (addr, 0));
++ /* Group the args of the second CONST in parenthesis. */
++ fputs ("-(", file);
++ /* Skip past the second CONST--it does nothing for us. */
++ output_addr_const (file, XEXP (XEXP (addr, 1), 0));
++ /* Close the parenthesis. */
++ fputc (')', file);
++ }
++ else
++ {
++ output_addr_const (file, addr);
++ }
++}
++
++/* Target hook for assembling integer objects. The sparc version has
++ special handling for aligned DI-mode objects. */
++
++static bool
++sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
++{
++ /* ??? We only output .xword's for symbols and only then in environments
++ where the assembler can handle them. */
++ if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
++ {
++ if (TARGET_V9)
++ {
++ assemble_integer_with_op ("\t.xword\t", x);
++ return true;
++ }
++ else
++ {
++ assemble_aligned_integer (4, const0_rtx);
++ assemble_aligned_integer (4, x);
++ return true;
++ }
++ }
++ return default_assemble_integer (x, size, aligned_p);
++}
++
++/* Return the value of a code used in the .proc pseudo-op that says
++ what kind of result this function returns. For non-C types, we pick
++ the closest C type. */
++
++#ifndef SHORT_TYPE_SIZE
++#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
++#endif
++
++#ifndef INT_TYPE_SIZE
++#define INT_TYPE_SIZE BITS_PER_WORD
++#endif
++
++#ifndef LONG_TYPE_SIZE
++#define LONG_TYPE_SIZE BITS_PER_WORD
++#endif
++
++#ifndef LONG_LONG_TYPE_SIZE
++#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
++#endif
++
++#ifndef FLOAT_TYPE_SIZE
++#define FLOAT_TYPE_SIZE BITS_PER_WORD
++#endif
++
++#ifndef DOUBLE_TYPE_SIZE
++#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
++#endif
++
++#ifndef LONG_DOUBLE_TYPE_SIZE
++#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
++#endif
++
++unsigned long
++sparc_type_code (register tree type)
++{
++ register unsigned long qualifiers = 0;
++ register unsigned shift;
++
++ /* Only the first 30 bits of the qualifier are valid. We must refrain from
++ setting more, since some assemblers will give an error for this. Also,
++ we must be careful to avoid shifts of 32 bits or more to avoid getting
++ unpredictable results. */
++
++ for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
++ {
++ switch (TREE_CODE (type))
++ {
++ case ERROR_MARK:
++ return qualifiers;
++
++ case ARRAY_TYPE:
++ qualifiers |= (3 << shift);
++ break;
++
++ case FUNCTION_TYPE:
++ case METHOD_TYPE:
++ qualifiers |= (2 << shift);
++ break;
++
++ case POINTER_TYPE:
++ case REFERENCE_TYPE:
++ case OFFSET_TYPE:
++ qualifiers |= (1 << shift);
++ break;
++
++ case RECORD_TYPE:
++ return (qualifiers | 8);
++
++ case UNION_TYPE:
++ case QUAL_UNION_TYPE:
++ return (qualifiers | 9);
++
++ case ENUMERAL_TYPE:
++ return (qualifiers | 10);
++
++ case VOID_TYPE:
++ return (qualifiers | 16);
++
++ case INTEGER_TYPE:
++ /* If this is a range type, consider it to be the underlying
++ type. */
++ if (TREE_TYPE (type) != 0)
++ break;
++
++ /* Carefully distinguish all the standard types of C,
++ without messing up if the language is not C. We do this by
++ testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
++ look at both the names and the above fields, but that's redundant.
++ Any type whose size is between two C types will be considered
++ to be the wider of the two types. Also, we do not have a
++ special code to use for "long long", so anything wider than
++ long is treated the same. Note that we can't distinguish
++ between "int" and "long" in this code if they are the same
++ size, but that's fine, since neither can the assembler. */
++
++ if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
++ return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
++
++ else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
++ return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
++
++ else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
++ return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
++
++ else
++ return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
++
++ case REAL_TYPE:
++ /* If this is a range type, consider it to be the underlying
++ type. */
++ if (TREE_TYPE (type) != 0)
++ break;
++
++ /* Carefully distinguish all the standard types of C,
++ without messing up if the language is not C. */
++
++ if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
++ return (qualifiers | 6);
++
++ else
++ return (qualifiers | 7);
++
++ case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
++ /* ??? We need to distinguish between double and float complex types,
++ but I don't know how yet because I can't reach this code from
++ existing front-ends. */
++ return (qualifiers | 7); /* Who knows? */
++
++ case VECTOR_TYPE:
++ case BOOLEAN_TYPE: /* Boolean truth value type. */
++ case LANG_TYPE:
++ case NULLPTR_TYPE:
++ return qualifiers;
++
++ default:
++ gcc_unreachable (); /* Not a type! */
++ }
++ }
++
++ return qualifiers;
++}
++
++/* Nested function support. */
++
++/* Emit RTL insns to initialize the variable parts of a trampoline.
++ FNADDR is an RTX for the address of the function's pure code.
++ CXT is an RTX for the static chain value for the function.
++
++ This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
++ (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
++ (to store insns). This is a bit excessive. Perhaps a different
++ mechanism would be better here.
++
++ Emit enough FLUSH insns to synchronize the data and instruction caches. */
++
++static void
++sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
++{
++ /* SPARC 32-bit trampoline:
++
++ sethi %hi(fn), %g1
++ sethi %hi(static), %g2
++ jmp %g1+%lo(fn)
++ or %g2, %lo(static), %g2
++
++ SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
++ JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
++ */
++
++ emit_move_insn
++ (adjust_address (m_tramp, SImode, 0),
++ expand_binop (SImode, ior_optab,
++ expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
++ GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
++ NULL_RTX, 1, OPTAB_DIRECT));
++
++ emit_move_insn
++ (adjust_address (m_tramp, SImode, 4),
++ expand_binop (SImode, ior_optab,
++ expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
++ GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
++ NULL_RTX, 1, OPTAB_DIRECT));
++
++ emit_move_insn
++ (adjust_address (m_tramp, SImode, 8),
++ expand_binop (SImode, ior_optab,
++ expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
++ GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
++ NULL_RTX, 1, OPTAB_DIRECT));
++
++ emit_move_insn
++ (adjust_address (m_tramp, SImode, 12),
++ expand_binop (SImode, ior_optab,
++ expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
++ GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
++ NULL_RTX, 1, OPTAB_DIRECT));
++
++ /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
++ aligned on a 16 byte boundary so one flush clears it all. */
++ emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
++ if (sparc_cpu != PROCESSOR_ULTRASPARC
++ && sparc_cpu != PROCESSOR_ULTRASPARC3
++ && sparc_cpu != PROCESSOR_NIAGARA
++ && sparc_cpu != PROCESSOR_NIAGARA2
++ && sparc_cpu != PROCESSOR_NIAGARA3
++ && sparc_cpu != PROCESSOR_NIAGARA4
++ && sparc_cpu != PROCESSOR_NIAGARA7
++ && sparc_cpu != PROCESSOR_M8)
++ emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
++
++ /* Call __enable_execute_stack after writing onto the stack to make sure
++ the stack address is accessible. */
++#ifdef HAVE_ENABLE_EXECUTE_STACK
++ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
++ LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
++#endif
++
++}
++
++/* The 64-bit version is simpler because it makes more sense to load the
++ values as "immediate" data out of the trampoline. It's also easier since
++ we can read the PC without clobbering a register. */
++
++static void
++sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
++{
++ /* SPARC 64-bit trampoline:
++
++ rd %pc, %g1
++ ldx [%g1+24], %g5
++ jmp %g5
++ ldx [%g1+16], %g5
++ +16 bytes data
++ */
++
++ emit_move_insn (adjust_address (m_tramp, SImode, 0),
++ GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
++ emit_move_insn (adjust_address (m_tramp, SImode, 4),
++ GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
++ emit_move_insn (adjust_address (m_tramp, SImode, 8),
++ GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
++ emit_move_insn (adjust_address (m_tramp, SImode, 12),
++ GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
++ emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
++ emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
++ emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
++
++ if (sparc_cpu != PROCESSOR_ULTRASPARC
++ && sparc_cpu != PROCESSOR_ULTRASPARC3
++ && sparc_cpu != PROCESSOR_NIAGARA
++ && sparc_cpu != PROCESSOR_NIAGARA2
++ && sparc_cpu != PROCESSOR_NIAGARA3
++ && sparc_cpu != PROCESSOR_NIAGARA4
++ && sparc_cpu != PROCESSOR_NIAGARA7
++ && sparc_cpu != PROCESSOR_M8)
++ emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
++
++ /* Call __enable_execute_stack after writing onto the stack to make sure
++ the stack address is accessible. */
++#ifdef HAVE_ENABLE_EXECUTE_STACK
++ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
++ LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
++#endif
++}
++
++/* Worker for TARGET_TRAMPOLINE_INIT. */
++
++static void
++sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
++{
++ rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
++ cxt = force_reg (Pmode, cxt);
++ if (TARGET_ARCH64)
++ sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
++ else
++ sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
++}
++
++/* Adjust the cost of a scheduling dependency. Return the new cost of
++ a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
++
++static int
++supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
++ int cost)
++{
++ enum attr_type insn_type;
++
++ if (recog_memoized (insn) < 0)
++ return cost;
++
++ insn_type = get_attr_type (insn);
++
++ if (dep_type == 0)
++ {
++ /* Data dependency; DEP_INSN writes a register that INSN reads some
++ cycles later. */
++
++ /* if a load, then the dependence must be on the memory address;
++ add an extra "cycle". Note that the cost could be two cycles
++ if the reg was written late in an instruction group; we ca not tell
++ here. */
++ if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
++ return cost + 3;
++
++ /* Get the delay only if the address of the store is the dependence. */
++ if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
++ {
++ rtx pat = PATTERN(insn);
++ rtx dep_pat = PATTERN (dep_insn);
++
++ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
++ return cost; /* This should not happen! */
++
++ /* The dependency between the two instructions was on the data that
++ is being stored. Assume that this implies that the address of the
++ store is not dependent. */
++ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
++ return cost;
++
++ return cost + 3; /* An approximation. */
++ }
++
++ /* A shift instruction cannot receive its data from an instruction
++ in the same cycle; add a one cycle penalty. */
++ if (insn_type == TYPE_SHIFT)
++ return cost + 3; /* Split before cascade into shift. */
++ }
++ else
++ {
++ /* Anti- or output- dependency; DEP_INSN reads/writes a register that
++ INSN writes some cycles later. */
++
++ /* These are only significant for the fpu unit; writing a fp reg before
++ the fpu has finished with it stalls the processor. */
++
++ /* Reusing an integer register causes no problems. */
++ if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
++ return 0;
++ }
++
++ return cost;
++}
++
++static int
++hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
++ int cost)
++{
++ enum attr_type insn_type, dep_type;
++ rtx pat = PATTERN(insn);
++ rtx dep_pat = PATTERN (dep_insn);
++
++ if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
++ return cost;
++
++ insn_type = get_attr_type (insn);
++ dep_type = get_attr_type (dep_insn);
++
++ switch (dtype)
++ {
++ case 0:
++ /* Data dependency; DEP_INSN writes a register that INSN reads some
++ cycles later. */
++
++ switch (insn_type)
++ {
++ case TYPE_STORE:
++ case TYPE_FPSTORE:
++ /* Get the delay iff the address of the store is the dependence. */
++ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
++ return cost;
++
++ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
++ return cost;
++ return cost + 3;
++
++ case TYPE_LOAD:
++ case TYPE_SLOAD:
++ case TYPE_FPLOAD:
++ /* If a load, then the dependence must be on the memory address. If
++ the addresses aren't equal, then it might be a false dependency */
++ if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
++ {
++ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
++ || GET_CODE (SET_DEST (dep_pat)) != MEM
++ || GET_CODE (SET_SRC (pat)) != MEM
++ || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
++ XEXP (SET_SRC (pat), 0)))
++ return cost + 2;
++
++ return cost + 8;
++ }
++ break;
++
++ case TYPE_BRANCH:
++ /* Compare to branch latency is 0. There is no benefit from
++ separating compare and branch. */
++ if (dep_type == TYPE_COMPARE)
++ return 0;
++ /* Floating point compare to branch latency is less than
++ compare to conditional move. */
++ if (dep_type == TYPE_FPCMP)
++ return cost - 1;
++ break;
++ default:
++ break;
++ }
++ break;
++
++ case REG_DEP_ANTI:
++ /* Anti-dependencies only penalize the fpu unit. */
++ if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
++ return 0;
++ break;
++
++ default:
++ break;
++ }
++
++ return cost;
++}
++
++static int
++sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
++ unsigned int)
++{
++ switch (sparc_cpu)
++ {
++ case PROCESSOR_SUPERSPARC:
++ cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
++ break;
++ case PROCESSOR_HYPERSPARC:
++ case PROCESSOR_SPARCLITE86X:
++ cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
++ break;
++ default:
++ break;
++ }
++ return cost;
++}
++
++static void
++sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
++ int sched_verbose ATTRIBUTE_UNUSED,
++ int max_ready ATTRIBUTE_UNUSED)
++{}
++
++static int
++sparc_use_sched_lookahead (void)
++{
++ switch (sparc_cpu)
++ {
++ case PROCESSOR_ULTRASPARC:
++ case PROCESSOR_ULTRASPARC3:
++ return 4;
++ case PROCESSOR_SUPERSPARC:
++ case PROCESSOR_HYPERSPARC:
++ case PROCESSOR_SPARCLITE86X:
++ return 3;
++ case PROCESSOR_NIAGARA4:
++ case PROCESSOR_NIAGARA7:
++ case PROCESSOR_M8:
++ return 2;
++ case PROCESSOR_NIAGARA:
++ case PROCESSOR_NIAGARA2:
++ case PROCESSOR_NIAGARA3:
++ default:
++ return 0;
++ }
++}
++
++static int
++sparc_issue_rate (void)
++{
++ switch (sparc_cpu)
++ {
++ case PROCESSOR_ULTRASPARC:
++ case PROCESSOR_ULTRASPARC3:
++ case PROCESSOR_M8:
++ return 4;
++ case PROCESSOR_SUPERSPARC:
++ return 3;
++ case PROCESSOR_HYPERSPARC:
++ case PROCESSOR_SPARCLITE86X:
++ case PROCESSOR_V9:
++ /* Assume V9 processors are capable of at least dual-issue. */
++ case PROCESSOR_NIAGARA4:
++ case PROCESSOR_NIAGARA7:
++ return 2;
++ case PROCESSOR_NIAGARA:
++ case PROCESSOR_NIAGARA2:
++ case PROCESSOR_NIAGARA3:
++ default:
++ return 1;
++ }
++}
++
++int
++sparc_branch_cost (bool speed_p, bool predictable_p)
++{
++ if (!speed_p)
++ return 2;
++
++ /* For pre-V9 processors we use a single value (usually 3) to take into
++ account the potential annulling of the delay slot (which ends up being
++ a bubble in the pipeline slot) plus a cycle to take into consideration
++ the instruction cache effects.
++
++ On V9 and later processors, which have branch prediction facilities,
++ we take into account whether the branch is (easily) predictable. */
++ const int cost = sparc_costs->branch_cost;
++
++ switch (sparc_cpu)
++ {
++ case PROCESSOR_V9:
++ case PROCESSOR_ULTRASPARC:
++ case PROCESSOR_ULTRASPARC3:
++ case PROCESSOR_NIAGARA:
++ case PROCESSOR_NIAGARA2:
++ case PROCESSOR_NIAGARA3:
++ case PROCESSOR_NIAGARA4:
++ case PROCESSOR_NIAGARA7:
++ case PROCESSOR_M8:
++ return cost + (predictable_p ? 0 : 2);
++
++ default:
++ return cost;
++ }
++}
++
++static int
++set_extends (rtx_insn *insn)
++{
++ register rtx pat = PATTERN (insn);
++
++ switch (GET_CODE (SET_SRC (pat)))
++ {
++ /* Load and some shift instructions zero extend. */
++ case MEM:
++ case ZERO_EXTEND:
++ /* sethi clears the high bits */
++ case HIGH:
++ /* LO_SUM is used with sethi. sethi cleared the high
++ bits and the values used with lo_sum are positive */
++ case LO_SUM:
++ /* Store flag stores 0 or 1 */
++ case LT: case LTU:
++ case GT: case GTU:
++ case LE: case LEU:
++ case GE: case GEU:
++ case EQ:
++ case NE:
++ return 1;
++ case AND:
++ {
++ rtx op0 = XEXP (SET_SRC (pat), 0);
++ rtx op1 = XEXP (SET_SRC (pat), 1);
++ if (GET_CODE (op1) == CONST_INT)
++ return INTVAL (op1) >= 0;
++ if (GET_CODE (op0) != REG)
++ return 0;
++ if (sparc_check_64 (op0, insn) == 1)
++ return 1;
++ return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
++ }
++ case IOR:
++ case XOR:
++ {
++ rtx op0 = XEXP (SET_SRC (pat), 0);
++ rtx op1 = XEXP (SET_SRC (pat), 1);
++ if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
++ return 0;
++ if (GET_CODE (op1) == CONST_INT)
++ return INTVAL (op1) >= 0;
++ return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
++ }
++ case LSHIFTRT:
++ return GET_MODE (SET_SRC (pat)) == SImode;
++ /* Positive integers leave the high bits zero. */
++ case CONST_INT:
++ return !(INTVAL (SET_SRC (pat)) & 0x80000000);
++ case ASHIFTRT:
++ case SIGN_EXTEND:
++ return - (GET_MODE (SET_SRC (pat)) == SImode);
++ case REG:
++ return sparc_check_64 (SET_SRC (pat), insn);
++ default:
++ return 0;
++ }
++}
++
++/* We _ought_ to have only one kind per function, but... */
++static GTY(()) rtx sparc_addr_diff_list;
++static GTY(()) rtx sparc_addr_list;
++
++void
++sparc_defer_case_vector (rtx lab, rtx vec, int diff)
++{
++ vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
++ if (diff)
++ sparc_addr_diff_list
++ = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
++ else
++ sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
++}
++
++static void
++sparc_output_addr_vec (rtx vec)
++{
++ rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
++ int idx, vlen = XVECLEN (body, 0);
++
++#ifdef ASM_OUTPUT_ADDR_VEC_START
++ ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
++#endif
++
++#ifdef ASM_OUTPUT_CASE_LABEL
++ ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
++ NEXT_INSN (lab));
++#else
++ (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
++#endif
++
++ for (idx = 0; idx < vlen; idx++)
++ {
++ ASM_OUTPUT_ADDR_VEC_ELT
++ (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
++ }
++
++#ifdef ASM_OUTPUT_ADDR_VEC_END
++ ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
++#endif
++}
++
++static void
++sparc_output_addr_diff_vec (rtx vec)
++{
++ rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
++ rtx base = XEXP (XEXP (body, 0), 0);
++ int idx, vlen = XVECLEN (body, 1);
++
++#ifdef ASM_OUTPUT_ADDR_VEC_START
++ ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
++#endif
++
++#ifdef ASM_OUTPUT_CASE_LABEL
++ ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
++ NEXT_INSN (lab));
++#else
++ (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
++#endif
++
++ for (idx = 0; idx < vlen; idx++)
++ {
++ ASM_OUTPUT_ADDR_DIFF_ELT
++ (asm_out_file,
++ body,
++ CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
++ CODE_LABEL_NUMBER (base));
++ }
++
++#ifdef ASM_OUTPUT_ADDR_VEC_END
++ ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
++#endif
++}
++
++static void
++sparc_output_deferred_case_vectors (void)
++{
++ rtx t;
++ int align;
++
++ if (sparc_addr_list == NULL_RTX
++ && sparc_addr_diff_list == NULL_RTX)
++ return;
++
++ /* Align to cache line in the function's code section. */
++ switch_to_section (current_function_section ());
++
++ align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
++ if (align > 0)
++ ASM_OUTPUT_ALIGN (asm_out_file, align);
++
++ for (t = sparc_addr_list; t ; t = XEXP (t, 1))
++ sparc_output_addr_vec (XEXP (t, 0));
++ for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
++ sparc_output_addr_diff_vec (XEXP (t, 0));
++
++ sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
++}
++
++/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
++ unknown. Return 1 if the high bits are zero, -1 if the register is
++ sign extended. */
++int
++sparc_check_64 (rtx x, rtx_insn *insn)
++{
++ /* If a register is set only once it is safe to ignore insns this
++ code does not know how to handle. The loop will either recognize
++ the single set and return the correct value or fail to recognize
++ it and return 0. */
++ int set_once = 0;
++ rtx y = x;
++
++ gcc_assert (GET_CODE (x) == REG);
++
++ if (GET_MODE (x) == DImode)
++ y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
++
++ if (flag_expensive_optimizations
++ && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
++ set_once = 1;
++
++ if (insn == 0)
++ {
++ if (set_once)
++ insn = get_last_insn_anywhere ();
++ else
++ return 0;
++ }
++
++ while ((insn = PREV_INSN (insn)))
++ {
++ switch (GET_CODE (insn))
++ {
++ case JUMP_INSN:
++ case NOTE:
++ break;
++ case CODE_LABEL:
++ case CALL_INSN:
++ default:
++ if (! set_once)
++ return 0;
++ break;
++ case INSN:
++ {
++ rtx pat = PATTERN (insn);
++ if (GET_CODE (pat) != SET)
++ return 0;
++ if (rtx_equal_p (x, SET_DEST (pat)))
++ return set_extends (insn);
++ if (y && rtx_equal_p (y, SET_DEST (pat)))
++ return set_extends (insn);
++ if (reg_overlap_mentioned_p (SET_DEST (pat), y))
++ return 0;
++ }
++ }
++ }
++ return 0;
++}
++
++/* Output a wide shift instruction in V8+ mode. INSN is the instruction,
++ OPERANDS are its operands and OPCODE is the mnemonic to be used. */
++
++const char *
++output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
++{
++ static char asm_code[60];
++
++ /* The scratch register is only required when the destination
++ register is not a 64-bit global or out register. */
++ if (which_alternative != 2)
++ operands[3] = operands[0];
++
++ /* We can only shift by constants <= 63. */
++ if (GET_CODE (operands[2]) == CONST_INT)
++ operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
++
++ if (GET_CODE (operands[1]) == CONST_INT)
++ {
++ output_asm_insn ("mov\t%1, %3", operands);
++ }
++ else
++ {
++ output_asm_insn ("sllx\t%H1, 32, %3", operands);
++ if (sparc_check_64 (operands[1], insn) <= 0)
++ output_asm_insn ("srl\t%L1, 0, %L1", operands);
++ output_asm_insn ("or\t%L1, %3, %3", operands);
++ }
++
++ strcpy (asm_code, opcode);
++
++ if (which_alternative != 2)
++ return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
++ else
++ return
++ strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
++}
++
++/* Output rtl to increment the profiler label LABELNO
++ for profiling a function entry. */
++
++void
++sparc_profile_hook (int labelno)
++{
++ char buf[32];
++ rtx lab, fun;
++
++ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
++ if (NO_PROFILE_COUNTERS)
++ {
++ emit_library_call (fun, LCT_NORMAL, VOIDmode);
++ }
++ else
++ {
++ ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
++ lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
++ emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
++ }
++}
++
++#ifdef TARGET_SOLARIS
++/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
++
++static void
++sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
++ tree decl ATTRIBUTE_UNUSED)
++{
++ if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
++ {
++ solaris_elf_asm_comdat_section (name, flags, decl);
++ return;
++ }
++
++ fprintf (asm_out_file, "\t.section\t\"%s\"", name);
++
++ if (!(flags & SECTION_DEBUG))
++ fputs (",#alloc", asm_out_file);
++#if HAVE_GAS_SECTION_EXCLUDE
++ if (flags & SECTION_EXCLUDE)
++ fputs (",#exclude", asm_out_file);
++#endif
++ if (flags & SECTION_WRITE)
++ fputs (",#write", asm_out_file);
++ if (flags & SECTION_TLS)
++ fputs (",#tls", asm_out_file);
++ if (flags & SECTION_CODE)
++ fputs (",#execinstr", asm_out_file);
++
++ if (flags & SECTION_NOTYPE)
++ ;
++ else if (flags & SECTION_BSS)
++ fputs (",#nobits", asm_out_file);
++ else
++ fputs (",#progbits", asm_out_file);
++
++ fputc ('\n', asm_out_file);
++}
++#endif /* TARGET_SOLARIS */
++
++/* We do not allow indirect calls to be optimized into sibling calls.
++
++ We cannot use sibling calls when delayed branches are disabled
++ because they will likely require the call delay slot to be filled.
++
++ Also, on SPARC 32-bit we cannot emit a sibling call when the
++ current function returns a structure. This is because the "unimp
++ after call" convention would cause the callee to return to the
++ wrong place. The generic code already disallows cases where the
++ function being called returns a structure.
++
++ It may seem strange how this last case could occur. Usually there
++ is code after the call which jumps to epilogue code which dumps the
++ return value into the struct return area. That ought to invalidate
++ the sibling call right? Well, in the C++ case we can end up passing
++ the pointer to the struct return area to a constructor (which returns
++ void) and then nothing else happens. Such a sibling call would look
++ valid without the added check here.
++
++ VxWorks PIC PLT entries require the global pointer to be initialized
++ on entry. We therefore can't emit sibling calls to them. */
++static bool
++sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
++{
++ return (decl
++ && flag_delayed_branch
++ && (TARGET_ARCH64 || ! cfun->returns_struct)
++ && !(TARGET_VXWORKS_RTP
++ && flag_pic
++ && !targetm.binds_local_p (decl)));
++}
++
++/* libfunc renaming. */
++
++static void
++sparc_init_libfuncs (void)
++{
++ if (TARGET_ARCH32)
++ {
++ /* Use the subroutines that Sun's library provides for integer
++ multiply and divide. The `*' prevents an underscore from
++ being prepended by the compiler. .umul is a little faster
++ than .mul. */
++ set_optab_libfunc (smul_optab, SImode, "*.umul");
++ set_optab_libfunc (sdiv_optab, SImode, "*.div");
++ set_optab_libfunc (udiv_optab, SImode, "*.udiv");
++ set_optab_libfunc (smod_optab, SImode, "*.rem");
++ set_optab_libfunc (umod_optab, SImode, "*.urem");
++
++ /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
++ set_optab_libfunc (add_optab, TFmode, "_Q_add");
++ set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
++ set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
++ set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
++ set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
++
++ /* We can define the TFmode sqrt optab only if TARGET_FPU. This
++ is because with soft-float, the SFmode and DFmode sqrt
++ instructions will be absent, and the compiler will notice and
++ try to use the TFmode sqrt instruction for calls to the
++ builtin function sqrt, but this fails. */
++ if (TARGET_FPU)
++ set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
++
++ set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
++ set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
++ set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
++ set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
++ set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
++ set_optab_libfunc (le_optab, TFmode, "_Q_fle");
++
++ set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
++ set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
++ set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
++ set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
++
++ set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
++ set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
++ set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
++ set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
++
++ if (DITF_CONVERSION_LIBFUNCS)
++ {
++ set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
++ set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
++ set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
++ set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
++ }
++
++ if (SUN_CONVERSION_LIBFUNCS)
++ {
++ set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
++ set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
++ set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
++ set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
++ }
++ }
++ if (TARGET_ARCH64)
++ {
++ /* In the SPARC 64bit ABI, SImode multiply and divide functions
++ do not exist in the library. Make sure the compiler does not
++ emit calls to them by accident. (It should always use the
++ hardware instructions.) */
++ set_optab_libfunc (smul_optab, SImode, 0);
++ set_optab_libfunc (sdiv_optab, SImode, 0);
++ set_optab_libfunc (udiv_optab, SImode, 0);
++ set_optab_libfunc (smod_optab, SImode, 0);
++ set_optab_libfunc (umod_optab, SImode, 0);
++
++ if (SUN_INTEGER_MULTIPLY_64)
++ {
++ set_optab_libfunc (smul_optab, DImode, "__mul64");
++ set_optab_libfunc (sdiv_optab, DImode, "__div64");
++ set_optab_libfunc (udiv_optab, DImode, "__udiv64");
++ set_optab_libfunc (smod_optab, DImode, "__rem64");
++ set_optab_libfunc (umod_optab, DImode, "__urem64");
++ }
++
++ if (SUN_CONVERSION_LIBFUNCS)
++ {
++ set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
++ set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
++ set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
++ set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
++ }
++ }
++}
++
++/* SPARC builtins. */
++enum sparc_builtins
++{
++ /* FPU builtins. */
++ SPARC_BUILTIN_LDFSR,
++ SPARC_BUILTIN_STFSR,
++
++ /* VIS 1.0 builtins. */
++ SPARC_BUILTIN_FPACK16,
++ SPARC_BUILTIN_FPACK32,
++ SPARC_BUILTIN_FPACKFIX,
++ SPARC_BUILTIN_FEXPAND,
++ SPARC_BUILTIN_FPMERGE,
++ SPARC_BUILTIN_FMUL8X16,
++ SPARC_BUILTIN_FMUL8X16AU,
++ SPARC_BUILTIN_FMUL8X16AL,
++ SPARC_BUILTIN_FMUL8SUX16,
++ SPARC_BUILTIN_FMUL8ULX16,
++ SPARC_BUILTIN_FMULD8SUX16,
++ SPARC_BUILTIN_FMULD8ULX16,
++ SPARC_BUILTIN_FALIGNDATAV4HI,
++ SPARC_BUILTIN_FALIGNDATAV8QI,
++ SPARC_BUILTIN_FALIGNDATAV2SI,
++ SPARC_BUILTIN_FALIGNDATADI,
++ SPARC_BUILTIN_WRGSR,
++ SPARC_BUILTIN_RDGSR,
++ SPARC_BUILTIN_ALIGNADDR,
++ SPARC_BUILTIN_ALIGNADDRL,
++ SPARC_BUILTIN_PDIST,
++ SPARC_BUILTIN_EDGE8,
++ SPARC_BUILTIN_EDGE8L,
++ SPARC_BUILTIN_EDGE16,
++ SPARC_BUILTIN_EDGE16L,
++ SPARC_BUILTIN_EDGE32,
++ SPARC_BUILTIN_EDGE32L,
++ SPARC_BUILTIN_FCMPLE16,
++ SPARC_BUILTIN_FCMPLE32,
++ SPARC_BUILTIN_FCMPNE16,
++ SPARC_BUILTIN_FCMPNE32,
++ SPARC_BUILTIN_FCMPGT16,
++ SPARC_BUILTIN_FCMPGT32,
++ SPARC_BUILTIN_FCMPEQ16,
++ SPARC_BUILTIN_FCMPEQ32,
++ SPARC_BUILTIN_FPADD16,
++ SPARC_BUILTIN_FPADD16S,
++ SPARC_BUILTIN_FPADD32,
++ SPARC_BUILTIN_FPADD32S,
++ SPARC_BUILTIN_FPSUB16,
++ SPARC_BUILTIN_FPSUB16S,
++ SPARC_BUILTIN_FPSUB32,
++ SPARC_BUILTIN_FPSUB32S,
++ SPARC_BUILTIN_ARRAY8,
++ SPARC_BUILTIN_ARRAY16,
++ SPARC_BUILTIN_ARRAY32,
++
++ /* VIS 2.0 builtins. */
++ SPARC_BUILTIN_EDGE8N,
++ SPARC_BUILTIN_EDGE8LN,
++ SPARC_BUILTIN_EDGE16N,
++ SPARC_BUILTIN_EDGE16LN,
++ SPARC_BUILTIN_EDGE32N,
++ SPARC_BUILTIN_EDGE32LN,
++ SPARC_BUILTIN_BMASK,
++ SPARC_BUILTIN_BSHUFFLEV4HI,
++ SPARC_BUILTIN_BSHUFFLEV8QI,
++ SPARC_BUILTIN_BSHUFFLEV2SI,
++ SPARC_BUILTIN_BSHUFFLEDI,
++
++ /* VIS 3.0 builtins. */
++ SPARC_BUILTIN_CMASK8,
++ SPARC_BUILTIN_CMASK16,
++ SPARC_BUILTIN_CMASK32,
++ SPARC_BUILTIN_FCHKSM16,
++ SPARC_BUILTIN_FSLL16,
++ SPARC_BUILTIN_FSLAS16,
++ SPARC_BUILTIN_FSRL16,
++ SPARC_BUILTIN_FSRA16,
++ SPARC_BUILTIN_FSLL32,
++ SPARC_BUILTIN_FSLAS32,
++ SPARC_BUILTIN_FSRL32,
++ SPARC_BUILTIN_FSRA32,
++ SPARC_BUILTIN_PDISTN,
++ SPARC_BUILTIN_FMEAN16,
++ SPARC_BUILTIN_FPADD64,
++ SPARC_BUILTIN_FPSUB64,
++ SPARC_BUILTIN_FPADDS16,
++ SPARC_BUILTIN_FPADDS16S,
++ SPARC_BUILTIN_FPSUBS16,
++ SPARC_BUILTIN_FPSUBS16S,
++ SPARC_BUILTIN_FPADDS32,
++ SPARC_BUILTIN_FPADDS32S,
++ SPARC_BUILTIN_FPSUBS32,
++ SPARC_BUILTIN_FPSUBS32S,
++ SPARC_BUILTIN_FUCMPLE8,
++ SPARC_BUILTIN_FUCMPNE8,
++ SPARC_BUILTIN_FUCMPGT8,
++ SPARC_BUILTIN_FUCMPEQ8,
++ SPARC_BUILTIN_FHADDS,
++ SPARC_BUILTIN_FHADDD,
++ SPARC_BUILTIN_FHSUBS,
++ SPARC_BUILTIN_FHSUBD,
++ SPARC_BUILTIN_FNHADDS,
++ SPARC_BUILTIN_FNHADDD,
++ SPARC_BUILTIN_UMULXHI,
++ SPARC_BUILTIN_XMULX,
++ SPARC_BUILTIN_XMULXHI,
++
++ /* VIS 4.0 builtins. */
++ SPARC_BUILTIN_FPADD8,
++ SPARC_BUILTIN_FPADDS8,
++ SPARC_BUILTIN_FPADDUS8,
++ SPARC_BUILTIN_FPADDUS16,
++ SPARC_BUILTIN_FPCMPLE8,
++ SPARC_BUILTIN_FPCMPGT8,
++ SPARC_BUILTIN_FPCMPULE16,
++ SPARC_BUILTIN_FPCMPUGT16,
++ SPARC_BUILTIN_FPCMPULE32,
++ SPARC_BUILTIN_FPCMPUGT32,
++ SPARC_BUILTIN_FPMAX8,
++ SPARC_BUILTIN_FPMAX16,
++ SPARC_BUILTIN_FPMAX32,
++ SPARC_BUILTIN_FPMAXU8,
++ SPARC_BUILTIN_FPMAXU16,
++ SPARC_BUILTIN_FPMAXU32,
++ SPARC_BUILTIN_FPMIN8,
++ SPARC_BUILTIN_FPMIN16,
++ SPARC_BUILTIN_FPMIN32,
++ SPARC_BUILTIN_FPMINU8,
++ SPARC_BUILTIN_FPMINU16,
++ SPARC_BUILTIN_FPMINU32,
++ SPARC_BUILTIN_FPSUB8,
++ SPARC_BUILTIN_FPSUBS8,
++ SPARC_BUILTIN_FPSUBUS8,
++ SPARC_BUILTIN_FPSUBUS16,
++
++ /* VIS 4.0B builtins. */
++
++ /* Note that all the DICTUNPACK* entries should be kept
++ contiguous. */
++ SPARC_BUILTIN_FIRST_DICTUNPACK,
++ SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
++ SPARC_BUILTIN_DICTUNPACK16,
++ SPARC_BUILTIN_DICTUNPACK32,
++ SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
++
++ /* Note that all the FPCMP*SHL entries should be kept
++ contiguous. */
++ SPARC_BUILTIN_FIRST_FPCMPSHL,
++ SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
++ SPARC_BUILTIN_FPCMPGT8SHL,
++ SPARC_BUILTIN_FPCMPEQ8SHL,
++ SPARC_BUILTIN_FPCMPNE8SHL,
++ SPARC_BUILTIN_FPCMPLE16SHL,
++ SPARC_BUILTIN_FPCMPGT16SHL,
++ SPARC_BUILTIN_FPCMPEQ16SHL,
++ SPARC_BUILTIN_FPCMPNE16SHL,
++ SPARC_BUILTIN_FPCMPLE32SHL,
++ SPARC_BUILTIN_FPCMPGT32SHL,
++ SPARC_BUILTIN_FPCMPEQ32SHL,
++ SPARC_BUILTIN_FPCMPNE32SHL,
++ SPARC_BUILTIN_FPCMPULE8SHL,
++ SPARC_BUILTIN_FPCMPUGT8SHL,
++ SPARC_BUILTIN_FPCMPULE16SHL,
++ SPARC_BUILTIN_FPCMPUGT16SHL,
++ SPARC_BUILTIN_FPCMPULE32SHL,
++ SPARC_BUILTIN_FPCMPUGT32SHL,
++ SPARC_BUILTIN_FPCMPDE8SHL,
++ SPARC_BUILTIN_FPCMPDE16SHL,
++ SPARC_BUILTIN_FPCMPDE32SHL,
++ SPARC_BUILTIN_FPCMPUR8SHL,
++ SPARC_BUILTIN_FPCMPUR16SHL,
++ SPARC_BUILTIN_FPCMPUR32SHL,
++ SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
++
++ SPARC_BUILTIN_MAX
++};
++
++static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
++static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
++
++/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
++ The instruction should require a constant operand of some sort. The
++ function prints an error if OPVAL is not valid. */
++
++static int
++check_constant_argument (enum insn_code icode, int opnum, rtx opval)
++{
++ if (GET_CODE (opval) != CONST_INT)
++ {
++ error ("%qs expects a constant argument", insn_data[icode].name);
++ return false;
++ }
++
++ if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
++ {
++ error ("constant argument out of range for %qs", insn_data[icode].name);
++ return false;
++ }
++ return true;
++}
++
++/* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
++ function decl or NULL_TREE if the builtin was not added. */
++
++static tree
++def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
++ tree type)
++{
++ tree t
++ = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
++
++ if (t)
++ {
++ sparc_builtins[code] = t;
++ sparc_builtins_icode[code] = icode;
++ }
++
++ return t;
++}
++
++/* Likewise, but also marks the function as "const". */
++
++static tree
++def_builtin_const (const char *name, enum insn_code icode,
++ enum sparc_builtins code, tree type)
++{
++ tree t = def_builtin (name, icode, code, type);
++
++ if (t)
++ TREE_READONLY (t) = 1;
++
++ return t;
++}
++
++/* Implement the TARGET_INIT_BUILTINS target hook.
++ Create builtin functions for special SPARC instructions. */
++
++static void
++sparc_init_builtins (void)
++{
++ if (TARGET_FPU)
++ sparc_fpu_init_builtins ();
++
++ if (TARGET_VIS)
++ sparc_vis_init_builtins ();
++}
++
++/* Create builtin functions for FPU instructions. */
++
++static void
++sparc_fpu_init_builtins (void)
++{
++ tree ftype
++ = build_function_type_list (void_type_node,
++ build_pointer_type (unsigned_type_node), 0);
++ def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
++ SPARC_BUILTIN_LDFSR, ftype);
++ def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
++ SPARC_BUILTIN_STFSR, ftype);
++}
++
++/* Create builtin functions for VIS instructions. */
++
++static void
++sparc_vis_init_builtins (void)
++{
++ tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
++ tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
++ tree v4hi = build_vector_type (intHI_type_node, 4);
++ tree v2hi = build_vector_type (intHI_type_node, 2);
++ tree v2si = build_vector_type (intSI_type_node, 2);
++ tree v1si = build_vector_type (intSI_type_node, 1);
++
++ tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
++ tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
++ tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
++ tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
++ tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
++ tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
++ tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
++ tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
++ tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
++ tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
++ tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
++ tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
++ tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
++ tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
++ tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
++ v8qi, v8qi,
++ intDI_type_node, 0);
++ tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
++ v8qi, v8qi, 0);
++ tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
++ v8qi, v8qi, 0);
++ tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
++ intSI_type_node, 0);
++ tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
++ intSI_type_node, 0);
++ tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
++ intDI_type_node, 0);
++ tree di_ftype_di_di = build_function_type_list (intDI_type_node,
++ intDI_type_node,
++ intDI_type_node, 0);
++ tree si_ftype_si_si = build_function_type_list (intSI_type_node,
++ intSI_type_node,
++ intSI_type_node, 0);
++ tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
++ ptr_type_node,
++ intSI_type_node, 0);
++ tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
++ ptr_type_node,
++ intDI_type_node, 0);
++ tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
++ ptr_type_node,
++ ptr_type_node, 0);
++ tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
++ ptr_type_node,
++ ptr_type_node, 0);
++ tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
++ v4hi, v4hi, 0);
++ tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
++ v2si, v2si, 0);
++ tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
++ v4hi, v4hi, 0);
++ tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
++ v2si, v2si, 0);
++ tree void_ftype_di = build_function_type_list (void_type_node,
++ intDI_type_node, 0);
++ tree di_ftype_void = build_function_type_list (intDI_type_node,
++ void_type_node, 0);
++ tree void_ftype_si = build_function_type_list (void_type_node,
++ intSI_type_node, 0);
++ tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
++ float_type_node,
++ float_type_node, 0);
++ tree df_ftype_df_df = build_function_type_list (double_type_node,
++ double_type_node,
++ double_type_node, 0);
++
++ /* Packing and expanding vectors. */
++ def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
++ SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
++ def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
++ SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
++ def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
++ SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
++ def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
++ SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
++ def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
++ SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
++
++ /* Multiplications. */
++ def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
++ SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
++ def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
++ SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
++ def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
++ SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
++ def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
++ SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
++ def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
++ SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
++ def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
++ SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
++ def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
++ SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
++
++ /* Data aligning. */
++ def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
++ SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
++ def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
++ SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
++ def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
++ SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
++ def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
++ SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
++
++ def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
++ SPARC_BUILTIN_WRGSR, void_ftype_di);
++ def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
++ SPARC_BUILTIN_RDGSR, di_ftype_void);
++
++ if (TARGET_ARCH64)
++ {
++ def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
++ SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
++ def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
++ SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
++ }
++ else
++ {
++ def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
++ SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
++ def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
++ SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
++ }
++
++ /* Pixel distance. */
++ def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
++ SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
++
++ /* Edge handling. */
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
++ SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
++ SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
++ SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
++ SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
++ SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
++ SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
++ SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
++ SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
++ SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
++ SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
++ SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
++ SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
++ }
++
++ /* Pixel compare. */
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
++ SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
++ SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
++ SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
++ SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
++ SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
++ SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
++ SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
++ SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
++ SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
++ SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
++ SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
++ SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
++ SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
++ SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
++ SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
++ SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
++ }
++
++ /* Addition and subtraction. */
++ def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
++ SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
++ SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
++ def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
++ SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
++ SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
++ def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
++ SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
++ SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
++ def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
++ SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
++ SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
++
++ /* Three-dimensional array addressing. */
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
++ SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
++ def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
++ SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
++ def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
++ SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
++ SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
++ def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
++ SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
++ def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
++ SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
++ }
++
++ if (TARGET_VIS2)
++ {
++ /* Edge handling. */
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
++ SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
++ SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
++ SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
++ SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
++ SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
++ SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
++ SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
++ SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
++ SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
++ SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
++ SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
++ def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
++ SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
++ }
++
++ /* Byte mask and shuffle. */
++ if (TARGET_ARCH64)
++ def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
++ SPARC_BUILTIN_BMASK, di_ftype_di_di);
++ else
++ def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
++ SPARC_BUILTIN_BMASK, si_ftype_si_si);
++ def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
++ SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
++ def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
++ SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
++ def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
++ SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
++ def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
++ SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
++ }
++
++ if (TARGET_VIS3)
++ {
++ if (TARGET_ARCH64)
++ {
++ def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
++ SPARC_BUILTIN_CMASK8, void_ftype_di);
++ def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
++ SPARC_BUILTIN_CMASK16, void_ftype_di);
++ def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
++ SPARC_BUILTIN_CMASK32, void_ftype_di);
++ }
++ else
++ {
++ def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
++ SPARC_BUILTIN_CMASK8, void_ftype_si);
++ def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
++ SPARC_BUILTIN_CMASK16, void_ftype_si);
++ def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
++ SPARC_BUILTIN_CMASK32, void_ftype_si);
++ }
++
++ def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
++ SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
++
++ def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
++ SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
++ SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
++ SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
++ SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
++ SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
++ SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
++ SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
++ SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
++
++ if (TARGET_ARCH64)
++ def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
++ SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
++ else
++ def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
++ SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
++
++ def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
++ SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
++ SPARC_BUILTIN_FPADD64, di_ftype_di_di);
++ def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
++ SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
++
++ def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
++ SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
++ SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
++ def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
++ SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
++ SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
++ def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
++ SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
++ SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
++ def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
++ SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
++ SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
++
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
++ SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
++ SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
++ SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
++ SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
++ SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
++ SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
++ SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
++ SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
++ }
++
++ def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
++ SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
++ def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
++ SPARC_BUILTIN_FHADDD, df_ftype_df_df);
++ def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
++ SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
++ def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
++ SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
++ def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
++ SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
++ def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
++ SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
++
++ def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
++ SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
++ def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
++ SPARC_BUILTIN_XMULX, di_ftype_di_di);
++ def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
++ SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
++ }
++
++ if (TARGET_VIS4)
++ {
++ def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
++ SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
++ SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
++ SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
++ SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
++
++
++ if (TARGET_ARCH64)
++ {
++ def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
++ SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
++ SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
++ SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
++ SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
++ SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
++ SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
++ }
++ else
++ {
++ def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
++ SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
++ SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
++ SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
++ SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
++ SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
++ SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
++ }
++
++ def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
++ SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
++ SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
++ SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
++ SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
++ SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
++ SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
++ SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
++ SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
++ SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
++ SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
++ SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
++ def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
++ SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
++ def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
++ SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
++ SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
++ SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
++ def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
++ SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
++ }
++
++ if (TARGET_VIS4B)
++ {
++ def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
++ SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
++ def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
++ SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
++ def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
++ SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
++
++ if (TARGET_ARCH64)
++ {
++ tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
++ v8qi, v8qi,
++ intSI_type_node, 0);
++ tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
++ v4hi, v4hi,
++ intSI_type_node, 0);
++ tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
++ v2si, v2si,
++ intSI_type_node, 0);
++
++ def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
++ SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
++ SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
++ SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
++ SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
++ SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
++ SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
++ SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
++ SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
++ SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
++ SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
++ SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
++ SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
++
++
++ def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
++ SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
++ SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
++ SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
++ SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
++ SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
++ SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
++ SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
++ SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
++ SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
++ SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
++ SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
++ SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
++
++ }
++ else
++ {
++ tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
++ v8qi, v8qi,
++ intSI_type_node, 0);
++ tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
++ v4hi, v4hi,
++ intSI_type_node, 0);
++ tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
++ v2si, v2si,
++ intSI_type_node, 0);
++
++ def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
++ SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
++ SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
++ SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
++ SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
++ SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
++ SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
++ SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
++ SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
++ SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
++ SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
++ SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
++ SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
++
++
++ def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
++ SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
++ SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
++ SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
++ SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
++ SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
++ def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
++ SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
++ SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
++ SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
++ SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
++
++ def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
++ SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
++ def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
++ SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
++ def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
++ SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
++ }
++ }
++}
++
++/* Implement TARGET_BUILTIN_DECL hook. */
++
++static tree
++sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
++{
++ if (code >= SPARC_BUILTIN_MAX)
++ return error_mark_node;
++
++ return sparc_builtins[code];
++}
++
++/* Implemented TARGET_EXPAND_BUILTIN hook. */
++
++static rtx
++sparc_expand_builtin (tree exp, rtx target,
++ rtx subtarget ATTRIBUTE_UNUSED,
++ machine_mode tmode ATTRIBUTE_UNUSED,
++ int ignore ATTRIBUTE_UNUSED)
++{
++ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
++ enum sparc_builtins code
++ = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
++ enum insn_code icode = sparc_builtins_icode[code];
++ bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
++ call_expr_arg_iterator iter;
++ int arg_count = 0;
++ rtx pat, op[4];
++ tree arg;
++
++ if (nonvoid)
++ {
++ machine_mode tmode = insn_data[icode].operand[0].mode;
++ if (!target
++ || GET_MODE (target) != tmode
++ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
++ op[0] = gen_reg_rtx (tmode);
++ else
++ op[0] = target;
++ }
++ else
++ op[0] = NULL_RTX;
++
++ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
++ {
++ const struct insn_operand_data *insn_op;
++ int idx;
++
++ if (arg == error_mark_node)
++ return NULL_RTX;
++
++ arg_count++;
++ idx = arg_count - !nonvoid;
++ insn_op = &insn_data[icode].operand[idx];
++ op[arg_count] = expand_normal (arg);
++
++ /* Some of the builtins require constant arguments. We check
++ for this here. */
++ if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
++ && code <= SPARC_BUILTIN_LAST_FPCMPSHL
++ && arg_count == 3)
++ || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
++ && code <= SPARC_BUILTIN_LAST_DICTUNPACK
++ && arg_count == 2))
++ {
++ if (!check_constant_argument (icode, idx, op[arg_count]))
++ return const0_rtx;
++ }
++
++ if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
++ {
++ if (!address_operand (op[arg_count], SImode))
++ {
++ op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
++ op[arg_count] = copy_addr_to_reg (op[arg_count]);
++ }
++ op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
++ }
++
++ else if (insn_op->mode == V1DImode
++ && GET_MODE (op[arg_count]) == DImode)
++ op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
++
++ else if (insn_op->mode == V1SImode
++ && GET_MODE (op[arg_count]) == SImode)
++ op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
++
++ if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
++ insn_op->mode))
++ op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
++ }
++
++ switch (arg_count)
++ {
++ case 0:
++ pat = GEN_FCN (icode) (op[0]);
++ break;
++ case 1:
++ if (nonvoid)
++ pat = GEN_FCN (icode) (op[0], op[1]);
++ else
++ pat = GEN_FCN (icode) (op[1]);
++ break;
++ case 2:
++ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
++ break;
++ case 3:
++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ if (!pat)
++ return NULL_RTX;
++
++ emit_insn (pat);
++
++ return (nonvoid ? op[0] : const0_rtx);
++}
++
++/* Return the upper 16 bits of the 8x16 multiplication. */
++
++static int
++sparc_vis_mul8x16 (int e8, int e16)
++{
++ return (e8 * e16 + 128) / 256;
++}
++
++/* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
++ the result into the array N_ELTS, whose elements are of INNER_TYPE. */
++
++static void
++sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
++ tree inner_type, tree cst0, tree cst1)
++{
++ unsigned i, num = VECTOR_CST_NELTS (cst0);
++ int scale;
++
++ switch (fncode)
++ {
++ case SPARC_BUILTIN_FMUL8X16:
++ for (i = 0; i < num; ++i)
++ {
++ int val
++ = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
++ TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
++ n_elts->quick_push (build_int_cst (inner_type, val));
++ }
++ break;
++
++ case SPARC_BUILTIN_FMUL8X16AU:
++ scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
++
++ for (i = 0; i < num; ++i)
++ {
++ int val
++ = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
++ scale);
++ n_elts->quick_push (build_int_cst (inner_type, val));
++ }
++ break;
++
++ case SPARC_BUILTIN_FMUL8X16AL:
++ scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
++
++ for (i = 0; i < num; ++i)
++ {
++ int val
++ = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
++ scale);
++ n_elts->quick_push (build_int_cst (inner_type, val));
++ }
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++}
++
++/* Implement TARGET_FOLD_BUILTIN hook.
++
++ Fold builtin functions for SPARC intrinsics. If IGNORE is true the
++ result of the function call is ignored. NULL_TREE is returned if the
++ function could not be folded. */
++
++static tree
++sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
++ tree *args, bool ignore)
++{
++ enum sparc_builtins code
++ = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
++ tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
++ tree arg0, arg1, arg2;
++
++ if (ignore)
++ switch (code)
++ {
++ case SPARC_BUILTIN_LDFSR:
++ case SPARC_BUILTIN_STFSR:
++ case SPARC_BUILTIN_ALIGNADDR:
++ case SPARC_BUILTIN_WRGSR:
++ case SPARC_BUILTIN_BMASK:
++ case SPARC_BUILTIN_CMASK8:
++ case SPARC_BUILTIN_CMASK16:
++ case SPARC_BUILTIN_CMASK32:
++ break;
++
++ default:
++ return build_zero_cst (rtype);
++ }
++
++ switch (code)
++ {
++ case SPARC_BUILTIN_FEXPAND:
++ arg0 = args[0];
++ STRIP_NOPS (arg0);
++
++ if (TREE_CODE (arg0) == VECTOR_CST)
++ {
++ tree inner_type = TREE_TYPE (rtype);
++ unsigned i;
++
++ tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
++ for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
++ {
++ unsigned HOST_WIDE_INT val
++ = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
++ n_elts.quick_push (build_int_cst (inner_type, val << 4));
++ }
++ return n_elts.build ();
++ }
++ break;
++
++ case SPARC_BUILTIN_FMUL8X16:
++ case SPARC_BUILTIN_FMUL8X16AU:
++ case SPARC_BUILTIN_FMUL8X16AL:
++ arg0 = args[0];
++ arg1 = args[1];
++ STRIP_NOPS (arg0);
++ STRIP_NOPS (arg1);
++
++ if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
++ {
++ tree inner_type = TREE_TYPE (rtype);
++ tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
++ sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
++ return n_elts.build ();
++ }
++ break;
++
++ case SPARC_BUILTIN_FPMERGE:
++ arg0 = args[0];
++ arg1 = args[1];
++ STRIP_NOPS (arg0);
++ STRIP_NOPS (arg1);
++
++ if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
++ {
++ tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
++ unsigned i;
++ for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
++ {
++ n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
++ n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
++ }
++
++ return n_elts.build ();
++ }
++ break;
++
++ case SPARC_BUILTIN_PDIST:
++ case SPARC_BUILTIN_PDISTN:
++ arg0 = args[0];
++ arg1 = args[1];
++ STRIP_NOPS (arg0);
++ STRIP_NOPS (arg1);
++ if (code == SPARC_BUILTIN_PDIST)
++ {
++ arg2 = args[2];
++ STRIP_NOPS (arg2);
++ }
++ else
++ arg2 = integer_zero_node;
++
++ if (TREE_CODE (arg0) == VECTOR_CST
++ && TREE_CODE (arg1) == VECTOR_CST
++ && TREE_CODE (arg2) == INTEGER_CST)
++ {
++ bool overflow = false;
++ widest_int result = wi::to_widest (arg2);
++ widest_int tmp;
++ unsigned i;
++
++ for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
++ {
++ tree e0 = VECTOR_CST_ELT (arg0, i);
++ tree e1 = VECTOR_CST_ELT (arg1, i);
++
++ wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
++
++ tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
++ tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
++ if (wi::neg_p (tmp))
++ tmp = wi::neg (tmp, &neg2_ovf);
++ else
++ neg2_ovf = wi::OVF_NONE;
++ result = wi::add (result, tmp, SIGNED, &add2_ovf);
++ overflow |= ((neg1_ovf != wi::OVF_NONE)
++ | (neg2_ovf != wi::OVF_NONE)
++ | (add1_ovf != wi::OVF_NONE)
++ | (add2_ovf != wi::OVF_NONE));
++ }
++
++ gcc_assert (!overflow);
++
++ return wide_int_to_tree (rtype, result);
++ }
++
++ default:
++ break;
++ }
++
++ return NULL_TREE;
++}
++
++/* ??? This duplicates information provided to the compiler by the
++ ??? scheduler description. Some day, teach genautomata to output
++ ??? the latencies and then CSE will just use that. */
++
++static bool
++sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
++ int opno ATTRIBUTE_UNUSED,
++ int *total, bool speed ATTRIBUTE_UNUSED)
++{
++ int code = GET_CODE (x);
++ bool float_mode_p = FLOAT_MODE_P (mode);
++
++ switch (code)
++ {
++ case CONST_INT:
++ if (SMALL_INT (x))
++ *total = 0;
++ else
++ *total = 2;
++ return true;
++
++ case CONST_WIDE_INT:
++ *total = 0;
++ if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
++ *total += 2;
++ if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
++ *total += 2;
++ return true;
++
++ case HIGH:
++ *total = 2;
++ return true;
++
++ case CONST:
++ case LABEL_REF:
++ case SYMBOL_REF:
++ *total = 4;
++ return true;
++
++ case CONST_DOUBLE:
++ *total = 8;
++ return true;
++
++ case MEM:
++ /* If outer-code was a sign or zero extension, a cost
++ of COSTS_N_INSNS (1) was already added in. This is
++ why we are subtracting it back out. */
++ if (outer_code == ZERO_EXTEND)
++ {
++ *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
++ }
++ else if (outer_code == SIGN_EXTEND)
++ {
++ *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
++ }
++ else if (float_mode_p)
++ {
++ *total = sparc_costs->float_load;
++ }
++ else
++ {
++ *total = sparc_costs->int_load;
++ }
++
++ return true;
++
++ case PLUS:
++ case MINUS:
++ if (float_mode_p)
++ *total = sparc_costs->float_plusminus;
++ else
++ *total = COSTS_N_INSNS (1);
++ return false;
++
++ case FMA:
++ {
++ rtx sub;
++
++ gcc_assert (float_mode_p);
++ *total = sparc_costs->float_mul;
++
++ sub = XEXP (x, 0);
++ if (GET_CODE (sub) == NEG)
++ sub = XEXP (sub, 0);
++ *total += rtx_cost (sub, mode, FMA, 0, speed);
++
++ sub = XEXP (x, 2);
++ if (GET_CODE (sub) == NEG)
++ sub = XEXP (sub, 0);
++ *total += rtx_cost (sub, mode, FMA, 2, speed);
++ return true;
++ }
++
++ case MULT:
++ if (float_mode_p)
++ *total = sparc_costs->float_mul;
++ else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
++ *total = COSTS_N_INSNS (25);
++ else
++ {
++ int bit_cost;
++
++ bit_cost = 0;
++ if (sparc_costs->int_mul_bit_factor)
++ {
++ int nbits;
++
++ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
++ {
++ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
++ for (nbits = 0; value != 0; value &= value - 1)
++ nbits++;
++ }
++ else
++ nbits = 7;
++
++ if (nbits < 3)
++ nbits = 3;
++ bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
++ bit_cost = COSTS_N_INSNS (bit_cost);
++ }
++
++ if (mode == DImode || !TARGET_HARD_MUL)
++ *total = sparc_costs->int_mulX + bit_cost;
++ else
++ *total = sparc_costs->int_mul + bit_cost;
++ }
++ return false;
++
++ case ASHIFT:
++ case ASHIFTRT:
++ case LSHIFTRT:
++ *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
++ return false;
++
++ case DIV:
++ case UDIV:
++ case MOD:
++ case UMOD:
++ if (float_mode_p)
++ {
++ if (mode == DFmode)
++ *total = sparc_costs->float_div_df;
++ else
++ *total = sparc_costs->float_div_sf;
++ }
++ else
++ {
++ if (mode == DImode)
++ *total = sparc_costs->int_divX;
++ else
++ *total = sparc_costs->int_div;
++ }
++ return false;
++
++ case NEG:
++ if (! float_mode_p)
++ {
++ *total = COSTS_N_INSNS (1);
++ return false;
++ }
++ /* FALLTHRU */
++
++ case ABS:
++ case FLOAT:
++ case UNSIGNED_FLOAT:
++ case FIX:
++ case UNSIGNED_FIX:
++ case FLOAT_EXTEND:
++ case FLOAT_TRUNCATE:
++ *total = sparc_costs->float_move;
++ return false;
++
++ case SQRT:
++ if (mode == DFmode)
++ *total = sparc_costs->float_sqrt_df;
++ else
++ *total = sparc_costs->float_sqrt_sf;
++ return false;
++
++ case COMPARE:
++ if (float_mode_p)
++ *total = sparc_costs->float_cmp;
++ else
++ *total = COSTS_N_INSNS (1);
++ return false;
++
++ case IF_THEN_ELSE:
++ if (float_mode_p)
++ *total = sparc_costs->float_cmove;
++ else
++ *total = sparc_costs->int_cmove;
++ return false;
++
++ case IOR:
++ /* Handle the NAND vector patterns. */
++ if (sparc_vector_mode_supported_p (mode)
++ && GET_CODE (XEXP (x, 0)) == NOT
++ && GET_CODE (XEXP (x, 1)) == NOT)
++ {
++ *total = COSTS_N_INSNS (1);
++ return true;
++ }
++ else
++ return false;
++
++ default:
++ return false;
++ }
++}
++
++/* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
++
++static inline bool
++general_or_i64_p (reg_class_t rclass)
++{
++ return (rclass == GENERAL_REGS || rclass == I64_REGS);
++}
++
++/* Implement TARGET_REGISTER_MOVE_COST. */
++
++static int
++sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
++ reg_class_t from, reg_class_t to)
++{
++ bool need_memory = false;
++
++ /* This helps postreload CSE to eliminate redundant comparisons. */
++ if (from == NO_REGS || to == NO_REGS)
++ return 100;
++
++ if (from == FPCC_REGS || to == FPCC_REGS)
++ need_memory = true;
++ else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
++ || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
++ {
++ if (TARGET_VIS3)
++ {
++ int size = GET_MODE_SIZE (mode);
++ if (size == 8 || size == 4)
++ {
++ if (! TARGET_ARCH32 || size == 4)
++ return 4;
++ else
++ return 6;
++ }
++ }
++ need_memory = true;
++ }
++
++ if (need_memory)
++ {
++ if (sparc_cpu == PROCESSOR_ULTRASPARC
++ || sparc_cpu == PROCESSOR_ULTRASPARC3
++ || sparc_cpu == PROCESSOR_NIAGARA
++ || sparc_cpu == PROCESSOR_NIAGARA2
++ || sparc_cpu == PROCESSOR_NIAGARA3
++ || sparc_cpu == PROCESSOR_NIAGARA4
++ || sparc_cpu == PROCESSOR_NIAGARA7
++ || sparc_cpu == PROCESSOR_M8)
++ return 12;
++
++ return 6;
++ }
++
++ return 2;
++}
++
++/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
++ This is achieved by means of a manual dynamic stack space allocation in
++ the current frame. We make the assumption that SEQ doesn't contain any
++ function calls, with the possible exception of calls to the GOT helper. */
++
++static void
++emit_and_preserve (rtx seq, rtx reg, rtx reg2)
++{
++ /* We must preserve the lowest 16 words for the register save area. */
++ HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
++ /* We really need only 2 words of fresh stack space. */
++ HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
++
++ rtx slot
++ = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
++ SPARC_STACK_BIAS + offset));
++
++ emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
++ emit_insn (gen_rtx_SET (slot, reg));
++ if (reg2)
++ emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
++ reg2));
++ emit_insn (seq);
++ if (reg2)
++ emit_insn (gen_rtx_SET (reg2,
++ adjust_address (slot, word_mode, UNITS_PER_WORD)));
++ emit_insn (gen_rtx_SET (reg, slot));
++ emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
++}
++
++/* Output the assembler code for a thunk function. THUNK_DECL is the
++ declaration for the thunk function itself, FUNCTION is the decl for
++ the target function. DELTA is an immediate constant offset to be
++ added to THIS. If VCALL_OFFSET is nonzero, the word at address
++ (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
++
++static void
++sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
++ HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
++ tree function)
++{
++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
++ rtx this_rtx, funexp;
++ rtx_insn *insn;
++ unsigned int int_arg_first;
++
++ reload_completed = 1;
++ epilogue_completed = 1;
++
++ emit_note (NOTE_INSN_PROLOGUE_END);
++
++ if (TARGET_FLAT)
++ {
++ sparc_leaf_function_p = 1;
++
++ int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
++ }
++ else if (flag_delayed_branch)
++ {
++ /* We will emit a regular sibcall below, so we need to instruct
++ output_sibcall that we are in a leaf function. */
++ sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
++
++ /* This will cause final.c to invoke leaf_renumber_regs so we
++ must behave as if we were in a not-yet-leafified function. */
++ int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
++ }
++ else
++ {
++ /* We will emit the sibcall manually below, so we will need to
++ manually spill non-leaf registers. */
++ sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
++
++ /* We really are in a leaf function. */
++ int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
++ }
++
++ /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
++ returns a structure, the structure return pointer is there instead. */
++ if (TARGET_ARCH64
++ && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
++ this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
++ else
++ this_rtx = gen_rtx_REG (Pmode, int_arg_first);
++
++ /* Add DELTA. When possible use a plain add, otherwise load it into
++ a register first. */
++ if (delta)
++ {
++ rtx delta_rtx = GEN_INT (delta);
++
++ if (! SPARC_SIMM13_P (delta))
++ {
++ rtx scratch = gen_rtx_REG (Pmode, 1);
++ emit_move_insn (scratch, delta_rtx);
++ delta_rtx = scratch;
++ }
++
++ /* THIS_RTX += DELTA. */
++ emit_insn (gen_add2_insn (this_rtx, delta_rtx));
++ }
++
++ /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
++ if (vcall_offset)
++ {
++ rtx vcall_offset_rtx = GEN_INT (vcall_offset);
++ rtx scratch = gen_rtx_REG (Pmode, 1);
++
++ gcc_assert (vcall_offset < 0);
++
++ /* SCRATCH = *THIS_RTX. */
++ emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
++
++ /* Prepare for adding VCALL_OFFSET. The difficulty is that we
++ may not have any available scratch register at this point. */
++ if (SPARC_SIMM13_P (vcall_offset))
++ ;
++ /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
++ else if (! fixed_regs[5]
++ /* The below sequence is made up of at least 2 insns,
++ while the default method may need only one. */
++ && vcall_offset < -8192)
++ {
++ rtx scratch2 = gen_rtx_REG (Pmode, 5);
++ emit_move_insn (scratch2, vcall_offset_rtx);
++ vcall_offset_rtx = scratch2;
++ }
++ else
++ {
++ rtx increment = GEN_INT (-4096);
++
++ /* VCALL_OFFSET is a negative number whose typical range can be
++ estimated as -32768..0 in 32-bit mode. In almost all cases
++ it is therefore cheaper to emit multiple add insns than
++ spilling and loading the constant into a register (at least
++ 6 insns). */
++ while (! SPARC_SIMM13_P (vcall_offset))
++ {
++ emit_insn (gen_add2_insn (scratch, increment));
++ vcall_offset += 4096;
++ }
++ vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
++ }
++
++ /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
++ emit_move_insn (scratch, gen_rtx_MEM (Pmode,
++ gen_rtx_PLUS (Pmode,
++ scratch,
++ vcall_offset_rtx)));
++
++ /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
++ emit_insn (gen_add2_insn (this_rtx, scratch));
++ }
++
++ /* Generate a tail call to the target function. */
++ if (! TREE_USED (function))
++ {
++ assemble_external (function);
++ TREE_USED (function) = 1;
++ }
++ funexp = XEXP (DECL_RTL (function), 0);
++
++ if (flag_delayed_branch)
++ {
++ funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
++ insn = emit_call_insn (gen_sibcall (funexp));
++ SIBLING_CALL_P (insn) = 1;
++ }
++ else
++ {
++ /* The hoops we have to jump through in order to generate a sibcall
++ without using delay slots... */
++ rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
++
++ if (flag_pic)
++ {
++ spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
++ start_sequence ();
++ load_got_register (); /* clobbers %o7 */
++ if (!TARGET_VXWORKS_RTP)
++ pic_offset_table_rtx = got_register_rtx;
++ scratch = sparc_legitimize_pic_address (funexp, scratch);
++ seq = get_insns ();
++ end_sequence ();
++ emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
++ }
++ else if (TARGET_ARCH32)
++ {
++ emit_insn (gen_rtx_SET (scratch,
++ gen_rtx_HIGH (SImode, funexp)));
++ emit_insn (gen_rtx_SET (scratch,
++ gen_rtx_LO_SUM (SImode, scratch, funexp)));
++ }
++ else /* TARGET_ARCH64 */
++ {
++ switch (sparc_code_model)
++ {
++ case CM_MEDLOW:
++ case CM_MEDMID:
++ /* The destination can serve as a temporary. */
++ sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
++ break;
++
++ case CM_MEDANY:
++ case CM_EMBMEDANY:
++ /* The destination cannot serve as a temporary. */
++ spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
++ start_sequence ();
++ sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
++ seq = get_insns ();
++ end_sequence ();
++ emit_and_preserve (seq, spill_reg, 0);
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++ }
++
++ emit_jump_insn (gen_indirect_jump (scratch));
++ }
++
++ emit_barrier ();
++
++ /* Run just enough of rest_of_compilation to get the insns emitted.
++ There's not really enough bulk here to make other passes such as
++ instruction scheduling worth while. */
++ insn = get_insns ();
++ shorten_branches (insn);
++ assemble_start_function (thunk_fndecl, fnname);
++ final_start_function (insn, file, 1);
++ final (insn, file, 1);
++ final_end_function ();
++ assemble_end_function (thunk_fndecl, fnname);
++
++ reload_completed = 0;
++ epilogue_completed = 0;
++}
++
++/* Return true if sparc_output_mi_thunk would be able to output the
++ assembler code for the thunk function specified by the arguments
++ it is passed, and false otherwise. */
++static bool
++sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
++ HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
++ HOST_WIDE_INT vcall_offset,
++ const_tree function ATTRIBUTE_UNUSED)
++{
++ /* Bound the loop used in the default method above. */
++ return (vcall_offset >= -32768 || ! fixed_regs[5]);
++}
++
++/* How to allocate a 'struct machine_function'. */
++
++static struct machine_function *
++sparc_init_machine_status (void)
++{
++ return ggc_cleared_alloc<machine_function> ();
++}
++
++/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
++
++static unsigned HOST_WIDE_INT
++sparc_asan_shadow_offset (void)
++{
++ return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
++}
++
++/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
++ We need to emit DTP-relative relocations. */
++
++static void
++sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
++{
++ switch (size)
++ {
++ case 4:
++ fputs ("\t.word\t%r_tls_dtpoff32(", file);
++ break;
++ case 8:
++ fputs ("\t.xword\t%r_tls_dtpoff64(", file);
++ break;
++ default:
++ gcc_unreachable ();
++ }
++ output_addr_const (file, x);
++ fputs (")", file);
++}
++
++/* Do whatever processing is required at the end of a file. */
++
++static void
++sparc_file_end (void)
++{
++ /* If we need to emit the special GOT helper function, do so now. */
++ if (got_helper_needed)
++ {
++ const char *name = XSTR (got_helper_rtx, 0);
++#ifdef DWARF2_UNWIND_INFO
++ bool do_cfi;
++#endif
++
++ if (USE_HIDDEN_LINKONCE)
++ {
++ tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
++ get_identifier (name),
++ build_function_type_list (void_type_node,
++ NULL_TREE));
++ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
++ NULL_TREE, void_type_node);
++ TREE_PUBLIC (decl) = 1;
++ TREE_STATIC (decl) = 1;
++ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
++ DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
++ DECL_VISIBILITY_SPECIFIED (decl) = 1;
++ resolve_unique_section (decl, 0, flag_function_sections);
++ allocate_struct_function (decl, true);
++ cfun->is_thunk = 1;
++ current_function_decl = decl;
++ init_varasm_status ();
++ assemble_start_function (decl, name);
++ }
++ else
++ {
++ const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
++ switch_to_section (text_section);
++ if (align > 0)
++ ASM_OUTPUT_ALIGN (asm_out_file, align);
++ ASM_OUTPUT_LABEL (asm_out_file, name);
++ }
++
++#ifdef DWARF2_UNWIND_INFO
++ do_cfi = dwarf2out_do_cfi_asm ();
++ if (do_cfi)
++ output_asm_insn (".cfi_startproc", NULL);
++#endif
++ if (flag_delayed_branch)
++ {
++ output_asm_insn ("jmp\t%%o7+8", NULL);
++ output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
++ }
++ else
++ {
++ output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
++ output_asm_insn ("jmp\t%%o7+8", NULL);
++ output_asm_insn (" nop", NULL);
++ }
++#ifdef DWARF2_UNWIND_INFO
++ if (do_cfi)
++ output_asm_insn (".cfi_endproc", NULL);
++#endif
++ }
++
++ if (NEED_INDICATE_EXEC_STACK)
++ file_end_indicate_exec_stack ();
++
++#ifdef TARGET_SOLARIS
++ solaris_file_end ();
++#endif
++}
++
++#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++/* Implement TARGET_MANGLE_TYPE. */
++
++static const char *
++sparc_mangle_type (const_tree type)
++{
++ if (TARGET_ARCH32
++ && TYPE_MAIN_VARIANT (type) == long_double_type_node
++ && TARGET_LONG_DOUBLE_128)
++ return "g";
++
++ /* For all other types, use normal C++ mangling. */
++ return NULL;
++}
++#endif
++
++/* Expand a membar instruction for various use cases. Both the LOAD_STORE
++ and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
++ bit 0 indicates that X is true, and bit 1 indicates Y is true. */
++
++void
++sparc_emit_membar_for_model (enum memmodel model,
++ int load_store, int before_after)
++{
++ /* Bits for the MEMBAR mmask field. */
++ const int LoadLoad = 1;
++ const int StoreLoad = 2;
++ const int LoadStore = 4;
++ const int StoreStore = 8;
++
++ int mm = 0, implied = 0;
++
++ switch (sparc_memory_model)
++ {
++ case SMM_SC:
++ /* Sequential Consistency. All memory transactions are immediately
++ visible in sequential execution order. No barriers needed. */
++ implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
++ break;
++
++ case SMM_TSO:
++ /* Total Store Ordering: all memory transactions with store semantics
++ are followed by an implied StoreStore. */
++ implied |= StoreStore;
++
++ /* If we're not looking for a raw barrer (before+after), then atomic
++ operations get the benefit of being both load and store. */
++ if (load_store == 3 && before_after == 1)
++ implied |= StoreLoad;
++ /* FALLTHRU */
++
++ case SMM_PSO:
++ /* Partial Store Ordering: all memory transactions with load semantics
++ are followed by an implied LoadLoad | LoadStore. */
++ implied |= LoadLoad | LoadStore;
++
++ /* If we're not looking for a raw barrer (before+after), then atomic
++ operations get the benefit of being both load and store. */
++ if (load_store == 3 && before_after == 2)
++ implied |= StoreLoad | StoreStore;
++ /* FALLTHRU */
++
++ case SMM_RMO:
++ /* Relaxed Memory Ordering: no implicit bits. */
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ if (before_after & 1)
++ {
++ if (is_mm_release (model) || is_mm_acq_rel (model)
++ || is_mm_seq_cst (model))
++ {
++ if (load_store & 1)
++ mm |= LoadLoad | StoreLoad;
++ if (load_store & 2)
++ mm |= LoadStore | StoreStore;
++ }
++ }
++ if (before_after & 2)
++ {
++ if (is_mm_acquire (model) || is_mm_acq_rel (model)
++ || is_mm_seq_cst (model))
++ {
++ if (load_store & 1)
++ mm |= LoadLoad | LoadStore;
++ if (load_store & 2)
++ mm |= StoreLoad | StoreStore;
++ }
++ }
++
++ /* Remove the bits implied by the system memory model. */
++ mm &= ~implied;
++
++ /* For raw barriers (before+after), always emit a barrier.
++ This will become a compile-time barrier if needed. */
++ if (mm || before_after == 3)
++ emit_insn (gen_membar (GEN_INT (mm)));
++}
++
++/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
++ compare and swap on the word containing the byte or half-word. */
++
++static void
++sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
++ rtx oldval, rtx newval)
++{
++ rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
++ rtx addr = gen_reg_rtx (Pmode);
++ rtx off = gen_reg_rtx (SImode);
++ rtx oldv = gen_reg_rtx (SImode);
++ rtx newv = gen_reg_rtx (SImode);
++ rtx oldvalue = gen_reg_rtx (SImode);
++ rtx newvalue = gen_reg_rtx (SImode);
++ rtx res = gen_reg_rtx (SImode);
++ rtx resv = gen_reg_rtx (SImode);
++ rtx memsi, val, mask, cc;
++
++ emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
++
++ if (Pmode != SImode)
++ addr1 = gen_lowpart (SImode, addr1);
++ emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
++
++ memsi = gen_rtx_MEM (SImode, addr);
++ set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
++ MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
++
++ val = copy_to_reg (memsi);
++
++ emit_insn (gen_rtx_SET (off,
++ gen_rtx_XOR (SImode, off,
++ GEN_INT (GET_MODE (mem) == QImode
++ ? 3 : 2))));
++
++ emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
++
++ if (GET_MODE (mem) == QImode)
++ mask = force_reg (SImode, GEN_INT (0xff));
++ else
++ mask = force_reg (SImode, GEN_INT (0xffff));
++
++ emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
++
++ emit_insn (gen_rtx_SET (val,
++ gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
++ val)));
++
++ oldval = gen_lowpart (SImode, oldval);
++ emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
++
++ newval = gen_lowpart_common (SImode, newval);
++ emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
++
++ emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
++
++ emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
++
++ rtx_code_label *end_label = gen_label_rtx ();
++ rtx_code_label *loop_label = gen_label_rtx ();
++ emit_label (loop_label);
++
++ emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
++
++ emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
++
++ emit_move_insn (bool_result, const1_rtx);
++
++ emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
++
++ emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
++
++ emit_insn (gen_rtx_SET (resv,
++ gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
++ res)));
++
++ emit_move_insn (bool_result, const0_rtx);
++
++ cc = gen_compare_reg_1 (NE, resv, val);
++ emit_insn (gen_rtx_SET (val, resv));
++
++ /* Use cbranchcc4 to separate the compare and branch! */
++ emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
++ cc, const0_rtx, loop_label));
++
++ emit_label (end_label);
++
++ emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
++
++ emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
++
++ emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
++}
++
++/* Expand code to perform a compare-and-swap. */
++
++void
++sparc_expand_compare_and_swap (rtx operands[])
++{
++ rtx bval, retval, mem, oldval, newval;
++ machine_mode mode;
++ enum memmodel model;
++
++ bval = operands[0];
++ retval = operands[1];
++ mem = operands[2];
++ oldval = operands[3];
++ newval = operands[4];
++ model = (enum memmodel) INTVAL (operands[6]);
++ mode = GET_MODE (mem);
++
++ sparc_emit_membar_for_model (model, 3, 1);
++
++ if (reg_overlap_mentioned_p (retval, oldval))
++ oldval = copy_to_reg (oldval);
++
++ if (mode == QImode || mode == HImode)
++ sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
++ else
++ {
++ rtx (*gen) (rtx, rtx, rtx, rtx);
++ rtx x;
++
++ if (mode == SImode)
++ gen = gen_atomic_compare_and_swapsi_1;
++ else
++ gen = gen_atomic_compare_and_swapdi_1;
++ emit_insn (gen (retval, mem, oldval, newval));
++
++ x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
++ if (x != bval)
++ convert_move (bval, x, 1);
++ }
++
++ sparc_emit_membar_for_model (model, 3, 2);
++}
++
++void
++sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
++{
++ rtx t_1, t_2, t_3;
++
++ sel = gen_lowpart (DImode, sel);
++ switch (vmode)
++ {
++ case E_V2SImode:
++ /* inp = xxxxxxxAxxxxxxxB */
++ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* t_1 = ....xxxxxxxAxxx. */
++ sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
++ GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
++ t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
++ GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = .......B */
++ /* t_1 = ...A.... */
++ sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
++ /* sel = ...A...B */
++ sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
++ /* sel = AAAABBBB * 4 */
++ t_1 = force_reg (SImode, GEN_INT (0x01230123));
++ /* sel = { A*4, A*4+1, A*4+2, ... } */
++ break;
++
++ case E_V4HImode:
++ /* inp = xxxAxxxBxxxCxxxD */
++ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* t_1 = ..xxxAxxxBxxxCxx */
++ /* t_2 = ....xxxAxxxBxxxC */
++ /* t_3 = ......xxxAxxxBxx */
++ sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
++ GEN_INT (0x07),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
++ GEN_INT (0x0700),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
++ GEN_INT (0x070000),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
++ GEN_INT (0x07000000),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = .......D */
++ /* t_1 = .....C.. */
++ /* t_2 = ...B.... */
++ /* t_3 = .A...... */
++ sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
++ t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
++ sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
++ /* sel = .A.B.C.D */
++ sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
++ /* sel = AABBCCDD * 2 */
++ t_1 = force_reg (SImode, GEN_INT (0x01010101));
++ /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
++ break;
++
++ case E_V8QImode:
++ /* input = xAxBxCxDxExFxGxH */
++ sel = expand_simple_binop (DImode, AND, sel,
++ GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
++ | 0x0f0f0f0f),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = .A.B.C.D.E.F.G.H */
++ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* t_1 = ..A.B.C.D.E.F.G. */
++ sel = expand_simple_binop (DImode, IOR, sel, t_1,
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = .AABBCCDDEEFFGGH */
++ sel = expand_simple_binop (DImode, AND, sel,
++ GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
++ | 0xff00ff),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = ..AB..CD..EF..GH */
++ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* t_1 = ....AB..CD..EF.. */
++ sel = expand_simple_binop (DImode, IOR, sel, t_1,
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = ..ABABCDCDEFEFGH */
++ sel = expand_simple_binop (DImode, AND, sel,
++ GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* sel = ....ABCD....EFGH */
++ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
++ NULL_RTX, 1, OPTAB_DIRECT);
++ /* t_1 = ........ABCD.... */
++ sel = gen_lowpart (SImode, sel);
++ t_1 = gen_lowpart (SImode, t_1);
++ break;
++
++ default:
++ gcc_unreachable ();
++ }
++
++ /* Always perform the final addition/merge within the bmask insn. */
++ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
++}
++
++/* Implement TARGET_VEC_PERM_CONST. */
++
++static bool
++sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
++ rtx op1, const vec_perm_indices &sel)
++{
++ if (!TARGET_VIS2)
++ return false;
++
++ /* All permutes are supported. */
++ if (!target)
++ return true;
++
++ /* Force target-independent code to convert constant permutations on other
++ modes down to V8QI. Rely on this to avoid the complexity of the byte
++ order of the permutation. */
++ if (vmode != V8QImode)
++ return false;
++
++ unsigned int i, mask;
++ for (i = mask = 0; i < 8; ++i)
++ mask |= (sel[i] & 0xf) << (28 - i*4);
++ rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
++
++ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
++ emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
++ return true;
++}
++
++/* Implement TARGET_FRAME_POINTER_REQUIRED. */
++
++static bool
++sparc_frame_pointer_required (void)
++{
++ /* If the stack pointer is dynamically modified in the function, it cannot
++ serve as the frame pointer. */
++ if (cfun->calls_alloca)
++ return true;
++
++ /* If the function receives nonlocal gotos, it needs to save the frame
++ pointer in the nonlocal_goto_save_area object. */
++ if (cfun->has_nonlocal_label)
++ return true;
++
++ /* In flat mode, that's it. */
++ if (TARGET_FLAT)
++ return false;
++
++ /* Otherwise, the frame pointer is required if the function isn't leaf, but
++ we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
++ return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
++}
++
++/* The way this is structured, we can't eliminate SFP in favor of SP
++ if the frame pointer is required: we want to use the SFP->HFP elimination
++ in that case. But the test in update_eliminables doesn't know we are
++ assuming below that we only do the former elimination. */
++
++static bool
++sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
++{
++ return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
++}
++
++/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
++ they won't be allocated. */
++
++static void
++sparc_conditional_register_usage (void)
++{
++ if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
++ fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
++ /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
++ /* then honor it. */
++ if (TARGET_ARCH32 && fixed_regs[5])
++ fixed_regs[5] = 1;
++ else if (TARGET_ARCH64 && fixed_regs[5] == 2)
++ fixed_regs[5] = 0;
++ if (! TARGET_V9)
++ {
++ int regno;
++ for (regno = SPARC_FIRST_V9_FP_REG;
++ regno <= SPARC_LAST_V9_FP_REG;
++ regno++)
++ fixed_regs[regno] = 1;
++ /* %fcc0 is used by v8 and v9. */
++ for (regno = SPARC_FIRST_V9_FCC_REG + 1;
++ regno <= SPARC_LAST_V9_FCC_REG;
++ regno++)
++ fixed_regs[regno] = 1;
++ }
++ if (! TARGET_FPU)
++ {
++ int regno;
++ for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
++ fixed_regs[regno] = 1;
++ }
++ /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
++ /* then honor it. Likewise with g3 and g4. */
++ if (fixed_regs[2] == 2)
++ fixed_regs[2] = ! TARGET_APP_REGS;
++ if (fixed_regs[3] == 2)
++ fixed_regs[3] = ! TARGET_APP_REGS;
++ if (TARGET_ARCH32 && fixed_regs[4] == 2)
++ fixed_regs[4] = ! TARGET_APP_REGS;
++ else if (TARGET_CM_EMBMEDANY)
++ fixed_regs[4] = 1;
++ else if (fixed_regs[4] == 2)
++ fixed_regs[4] = 0;
++ if (TARGET_FLAT)
++ {
++ int regno;
++ /* Disable leaf functions. */
++ memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
++ leaf_reg_remap [regno] = regno;
++ }
++ if (TARGET_VIS)
++ global_regs[SPARC_GSR_REG] = 1;
++}
++
++/* Implement TARGET_USE_PSEUDO_PIC_REG. */
++
++static bool
++sparc_use_pseudo_pic_reg (void)
++{
++ return !TARGET_VXWORKS_RTP && flag_pic;
++}
++
++/* Implement TARGET_INIT_PIC_REG. */
++
++static void
++sparc_init_pic_reg (void)
++{
++ edge entry_edge;
++ rtx_insn *seq;
++
++ /* In PIC mode, we need to always initialize the PIC register if optimization
++ is enabled, because we are called from IRA and LRA may later force things
++ to the constant pool for optimization purposes. */
++ if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
++ return;
++
++ start_sequence ();
++ load_got_register ();
++ if (!TARGET_VXWORKS_RTP)
++ emit_move_insn (pic_offset_table_rtx, got_register_rtx);
++ seq = get_insns ();
++ end_sequence ();
++
++ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
++ insert_insn_on_edge (seq, entry_edge);
++ commit_one_edge_insertion (entry_edge);
++}
++
++/* Implement TARGET_PREFERRED_RELOAD_CLASS:
++
++ - We can't load constants into FP registers.
++ - We can't load FP constants into integer registers when soft-float,
++ because there is no soft-float pattern with a r/F constraint.
++ - We can't load FP constants into integer registers for TFmode unless
++ it is 0.0L, because there is no movtf pattern with a r/F constraint.
++ - Try and reload integer constants (symbolic or otherwise) back into
++ registers directly, rather than having them dumped to memory. */
++
++static reg_class_t
++sparc_preferred_reload_class (rtx x, reg_class_t rclass)
++{
++ machine_mode mode = GET_MODE (x);
++ if (CONSTANT_P (x))
++ {
++ if (FP_REG_CLASS_P (rclass)
++ || rclass == GENERAL_OR_FP_REGS
++ || rclass == GENERAL_OR_EXTRA_FP_REGS
++ || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
++ || (mode == TFmode && ! const_zero_operand (x, mode)))
++ return NO_REGS;
++
++ if (GET_MODE_CLASS (mode) == MODE_INT)
++ return GENERAL_REGS;
++
++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
++ {
++ if (! FP_REG_CLASS_P (rclass)
++ || !(const_zero_operand (x, mode)
++ || const_all_ones_operand (x, mode)))
++ return NO_REGS;
++ }
++ }
++
++ if (TARGET_VIS3
++ && ! TARGET_ARCH64
++ && (rclass == EXTRA_FP_REGS
++ || rclass == GENERAL_OR_EXTRA_FP_REGS))
++ {
++ int regno = true_regnum (x);
++
++ if (SPARC_INT_REG_P (regno))
++ return (rclass == EXTRA_FP_REGS
++ ? FP_REGS : GENERAL_OR_FP_REGS);
++ }
++
++ return rclass;
++}
++
++/* Return true if we use LRA instead of reload pass. */
++
++static bool
++sparc_lra_p (void)
++{
++ return TARGET_LRA;
++}
++
++/* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
++ OPERANDS are its operands and OPCODE is the mnemonic to be used. */
++
++const char *
++output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
++{
++ char mulstr[32];
++
++ gcc_assert (! TARGET_ARCH64);
++
++ if (sparc_check_64 (operands[1], insn) <= 0)
++ output_asm_insn ("srl\t%L1, 0, %L1", operands);
++ if (which_alternative == 1)
++ output_asm_insn ("sllx\t%H1, 32, %H1", operands);
++ if (GET_CODE (operands[2]) == CONST_INT)
++ {
++ if (which_alternative == 1)
++ {
++ output_asm_insn ("or\t%L1, %H1, %H1", operands);
++ sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
++ output_asm_insn (mulstr, operands);
++ return "srlx\t%L0, 32, %H0";
++ }
++ else
++ {
++ output_asm_insn ("sllx\t%H1, 32, %3", operands);
++ output_asm_insn ("or\t%L1, %3, %3", operands);
++ sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
++ output_asm_insn (mulstr, operands);
++ output_asm_insn ("srlx\t%3, 32, %H0", operands);
++ return "mov\t%3, %L0";
++ }
++ }
++ else if (rtx_equal_p (operands[1], operands[2]))
++ {
++ if (which_alternative == 1)
++ {
++ output_asm_insn ("or\t%L1, %H1, %H1", operands);
++ sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
++ output_asm_insn (mulstr, operands);
++ return "srlx\t%L0, 32, %H0";
++ }
++ else
++ {
++ output_asm_insn ("sllx\t%H1, 32, %3", operands);
++ output_asm_insn ("or\t%L1, %3, %3", operands);
++ sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
++ output_asm_insn (mulstr, operands);
++ output_asm_insn ("srlx\t%3, 32, %H0", operands);
++ return "mov\t%3, %L0";
++ }
++ }
++ if (sparc_check_64 (operands[2], insn) <= 0)
++ output_asm_insn ("srl\t%L2, 0, %L2", operands);
++ if (which_alternative == 1)
++ {
++ output_asm_insn ("or\t%L1, %H1, %H1", operands);
++ output_asm_insn ("sllx\t%H2, 32, %L1", operands);
++ output_asm_insn ("or\t%L2, %L1, %L1", operands);
++ sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
++ output_asm_insn (mulstr, operands);
++ return "srlx\t%L0, 32, %H0";
++ }
++ else
++ {
++ output_asm_insn ("sllx\t%H1, 32, %3", operands);
++ output_asm_insn ("sllx\t%H2, 32, %4", operands);
++ output_asm_insn ("or\t%L1, %3, %3", operands);
++ output_asm_insn ("or\t%L2, %4, %4", operands);
++ sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
++ output_asm_insn (mulstr, operands);
++ output_asm_insn ("srlx\t%3, 32, %H0", operands);
++ return "mov\t%3, %L0";
++ }
++}
++
++/* Subroutine of sparc_expand_vector_init. Emit code to initialize
++ all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
++ and INNER_MODE are the modes describing TARGET. */
++
++static void
++vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
++ machine_mode inner_mode)
++{
++ rtx t1, final_insn, sel;
++ int bmask;
++
++ t1 = gen_reg_rtx (mode);
++
++ elt = convert_modes (SImode, inner_mode, elt, true);
++ emit_move_insn (gen_lowpart(SImode, t1), elt);
++
++ switch (mode)
++ {
++ case E_V2SImode:
++ final_insn = gen_bshufflev2si_vis (target, t1, t1);
++ bmask = 0x45674567;
++ break;
++ case E_V4HImode:
++ final_insn = gen_bshufflev4hi_vis (target, t1, t1);
++ bmask = 0x67676767;
++ break;
++ case E_V8QImode:
++ final_insn = gen_bshufflev8qi_vis (target, t1, t1);
++ bmask = 0x77777777;
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ sel = force_reg (SImode, GEN_INT (bmask));
++ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
++ emit_insn (final_insn);
++}
++
++/* Subroutine of sparc_expand_vector_init. Emit code to initialize
++ all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
++
++static void
++vector_init_fpmerge (rtx target, rtx elt)
++{
++ rtx t1, t2, t2_low, t3, t3_low;
++
++ t1 = gen_reg_rtx (V4QImode);
++ elt = convert_modes (SImode, QImode, elt, true);
++ emit_move_insn (gen_lowpart (SImode, t1), elt);
++
++ t2 = gen_reg_rtx (V8QImode);
++ t2_low = gen_lowpart (V4QImode, t2);
++ emit_insn (gen_fpmerge_vis (t2, t1, t1));
++
++ t3 = gen_reg_rtx (V8QImode);
++ t3_low = gen_lowpart (V4QImode, t3);
++ emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
++
++ emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
++}
++
++/* Subroutine of sparc_expand_vector_init. Emit code to initialize
++ all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
++
++static void
++vector_init_faligndata (rtx target, rtx elt)
++{
++ rtx t1 = gen_reg_rtx (V4HImode);
++ int i;
++
++ elt = convert_modes (SImode, HImode, elt, true);
++ emit_move_insn (gen_lowpart (SImode, t1), elt);
++
++ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
++ force_reg (SImode, GEN_INT (6)),
++ const0_rtx));
++
++ for (i = 0; i < 4; i++)
++ emit_insn (gen_faligndatav4hi_vis (target, t1, target));
++}
++
++/* Emit code to initialize TARGET to values for individual fields VALS. */
++
++void
++sparc_expand_vector_init (rtx target, rtx vals)
++{
++ const machine_mode mode = GET_MODE (target);
++ const machine_mode inner_mode = GET_MODE_INNER (mode);
++ const int n_elts = GET_MODE_NUNITS (mode);
++ int i, n_var = 0;
++ bool all_same = true;
++ rtx mem;
++
++ for (i = 0; i < n_elts; i++)
++ {
++ rtx x = XVECEXP (vals, 0, i);
++ if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
++ n_var++;
++
++ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
++ all_same = false;
++ }
++
++ if (n_var == 0)
++ {
++ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
++ return;
++ }
++
++ if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
++ {
++ if (GET_MODE_SIZE (inner_mode) == 4)
++ {
++ emit_move_insn (gen_lowpart (SImode, target),
++ gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
++ return;
++ }
++ else if (GET_MODE_SIZE (inner_mode) == 8)
++ {
++ emit_move_insn (gen_lowpart (DImode, target),
++ gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
++ return;
++ }
++ }
++ else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
++ && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
++ {
++ emit_move_insn (gen_highpart (word_mode, target),
++ gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
++ emit_move_insn (gen_lowpart (word_mode, target),
++ gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
++ return;
++ }
++
++ if (all_same && GET_MODE_SIZE (mode) == 8)
++ {
++ if (TARGET_VIS2)
++ {
++ vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
++ return;
++ }
++ if (mode == V8QImode)
++ {
++ vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
++ return;
++ }
++ if (mode == V4HImode)
++ {
++ vector_init_faligndata (target, XVECEXP (vals, 0, 0));
++ return;
++ }
++ }
++
++ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
++ for (i = 0; i < n_elts; i++)
++ emit_move_insn (adjust_address_nv (mem, inner_mode,
++ i * GET_MODE_SIZE (inner_mode)),
++ XVECEXP (vals, 0, i));
++ emit_move_insn (target, mem);
++}
++
++/* Implement TARGET_SECONDARY_RELOAD. */
++
++static reg_class_t
++sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
++ machine_mode mode, secondary_reload_info *sri)
++{
++ enum reg_class rclass = (enum reg_class) rclass_i;
++
++ sri->icode = CODE_FOR_nothing;
++ sri->extra_cost = 0;
++
++ /* We need a temporary when loading/storing a HImode/QImode value
++ between memory and the FPU registers. This can happen when combine puts
++ a paradoxical subreg in a float/fix conversion insn. */
++ if (FP_REG_CLASS_P (rclass)
++ && (mode == HImode || mode == QImode)
++ && (GET_CODE (x) == MEM
++ || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
++ && true_regnum (x) == -1)))
++ return GENERAL_REGS;
++
++ /* On 32-bit we need a temporary when loading/storing a DFmode value
++ between unaligned memory and the upper FPU registers. */
++ if (TARGET_ARCH32
++ && rclass == EXTRA_FP_REGS
++ && mode == DFmode
++ && GET_CODE (x) == MEM
++ && ! mem_min_alignment (x, 8))
++ return FP_REGS;
++
++ if (((TARGET_CM_MEDANY
++ && symbolic_operand (x, mode))
++ || (TARGET_CM_EMBMEDANY
++ && text_segment_operand (x, mode)))
++ && ! flag_pic)
++ {
++ if (in_p)
++ sri->icode = direct_optab_handler (reload_in_optab, mode);
++ else
++ sri->icode = direct_optab_handler (reload_out_optab, mode);
++ return NO_REGS;
++ }
++
++ if (TARGET_VIS3 && TARGET_ARCH32)
++ {
++ int regno = true_regnum (x);
++
++ /* When using VIS3 fp<-->int register moves, on 32-bit we have
++ to move 8-byte values in 4-byte pieces. This only works via
++ FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
++ move between EXTRA_FP_REGS and GENERAL_REGS, we will need
++ an FP_REGS intermediate move. */
++ if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
++ || ((general_or_i64_p (rclass)
++ || rclass == GENERAL_OR_FP_REGS)
++ && SPARC_FP_REG_P (regno)))
++ {
++ sri->extra_cost = 2;
++ return FP_REGS;
++ }
++ }
++
++ return NO_REGS;
++}
++
++/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
++
++ On SPARC when not VIS3 it is not possible to directly move data
++ between GENERAL_REGS and FP_REGS. */
++
++static bool
++sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
++ reg_class_t class2)
++{
++ return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
++ && (! TARGET_VIS3
++ || GET_MODE_SIZE (mode) > 8
++ || GET_MODE_SIZE (mode) < 4));
++}
++
++/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
++
++ get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
++ because the movsi and movsf patterns don't handle r/f moves.
++ For v8 we copy the default definition. */
++
++static machine_mode
++sparc_secondary_memory_needed_mode (machine_mode mode)
++{
++ if (TARGET_ARCH64)
++ {
++ if (GET_MODE_BITSIZE (mode) < 32)
++ return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
++ return mode;
++ }
++ else
++ {
++ if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
++ return mode_for_size (BITS_PER_WORD,
++ GET_MODE_CLASS (mode), 0).require ();
++ return mode;
++ }
++}
++
++/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
++ OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
++
++bool
++sparc_expand_conditional_move (machine_mode mode, rtx *operands)
++{
++ enum rtx_code rc = GET_CODE (operands[1]);
++ machine_mode cmp_mode;
++ rtx cc_reg, dst, cmp;
++
++ cmp = operands[1];
++ if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
++ return false;
++
++ if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
++ cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
++
++ cmp_mode = GET_MODE (XEXP (cmp, 0));
++ rc = GET_CODE (cmp);
++
++ dst = operands[0];
++ if (! rtx_equal_p (operands[2], dst)
++ && ! rtx_equal_p (operands[3], dst))
++ {
++ if (reg_overlap_mentioned_p (dst, cmp))
++ dst = gen_reg_rtx (mode);
++
++ emit_move_insn (dst, operands[3]);
++ }
++ else if (operands[2] == dst)
++ {
++ operands[2] = operands[3];
++
++ if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
++ rc = reverse_condition_maybe_unordered (rc);
++ else
++ rc = reverse_condition (rc);
++ }
++
++ if (XEXP (cmp, 1) == const0_rtx
++ && GET_CODE (XEXP (cmp, 0)) == REG
++ && cmp_mode == DImode
++ && v9_regcmp_p (rc))
++ cc_reg = XEXP (cmp, 0);
++ else
++ cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
++
++ cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
++
++ emit_insn (gen_rtx_SET (dst,
++ gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
++
++ if (dst != operands[0])
++ emit_move_insn (operands[0], dst);
++
++ return true;
++}
++
++/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
++ into OPERANDS[0] in MODE, depending on the outcome of the comparison of
++ OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
++ FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
++ code to be used for the condition mask. */
++
++void
++sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
++{
++ rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
++ enum rtx_code code = GET_CODE (operands[3]);
++
++ mask = gen_reg_rtx (Pmode);
++ cop0 = operands[4];
++ cop1 = operands[5];
++ if (code == LT || code == GE)
++ {
++ rtx t;
++
++ code = swap_condition (code);
++ t = cop0; cop0 = cop1; cop1 = t;
++ }
++
++ gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
++
++ fcmp = gen_rtx_UNSPEC (Pmode,
++ gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
++ fcode);
++
++ cmask = gen_rtx_UNSPEC (DImode,
++ gen_rtvec (2, mask, gsr),
++ ccode);
++
++ bshuf = gen_rtx_UNSPEC (mode,
++ gen_rtvec (3, operands[1], operands[2], gsr),
++ UNSPEC_BSHUFFLE);
++
++ emit_insn (gen_rtx_SET (mask, fcmp));
++ emit_insn (gen_rtx_SET (gsr, cmask));
++
++ emit_insn (gen_rtx_SET (operands[0], bshuf));
++}
++
++/* On sparc, any mode which naturally allocates into the float
++ registers should return 4 here. */
++
++unsigned int
++sparc_regmode_natural_size (machine_mode mode)
++{
++ int size = UNITS_PER_WORD;
++
++ if (TARGET_ARCH64)
++ {
++ enum mode_class mclass = GET_MODE_CLASS (mode);
++
++ if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
++ size = 4;
++ }
++
++ return size;
++}
++
++/* Implement TARGET_HARD_REGNO_NREGS.
++
++ On SPARC, ordinary registers hold 32 bits worth; this means both
++ integer and floating point registers. On v9, integer regs hold 64
++ bits worth; floating point regs hold 32 bits worth (this includes the
++ new fp regs as even the odd ones are included in the hard register
++ count). */
++
++static unsigned int
++sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
++{
++ if (regno == SPARC_GSR_REG)
++ return 1;
++ if (TARGET_ARCH64)
++ {
++ if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
++ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
++ return CEIL (GET_MODE_SIZE (mode), 4);
++ }
++ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
++}
++
++/* Implement TARGET_HARD_REGNO_MODE_OK.
++
++ ??? Because of the funny way we pass parameters we should allow certain
++ ??? types of float/complex values to be in integer registers during
++ ??? RTL generation. This only matters on arch32. */
++
++static bool
++sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
++{
++ return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
++}
++
++/* Implement TARGET_MODES_TIEABLE_P.
++
++ For V9 we have to deal with the fact that only the lower 32 floating
++ point registers are 32-bit addressable. */
++
++static bool
++sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
++{
++ enum mode_class mclass1, mclass2;
++ unsigned short size1, size2;
++
++ if (mode1 == mode2)
++ return true;
++
++ mclass1 = GET_MODE_CLASS (mode1);
++ mclass2 = GET_MODE_CLASS (mode2);
++ if (mclass1 != mclass2)
++ return false;
++
++ if (! TARGET_V9)
++ return true;
++
++ /* Classes are the same and we are V9 so we have to deal with upper
++ vs. lower floating point registers. If one of the modes is a
++ 4-byte mode, and the other is not, we have to mark them as not
++ tieable because only the lower 32 floating point register are
++ addressable 32-bits at a time.
++
++ We can't just test explicitly for SFmode, otherwise we won't
++ cover the vector mode cases properly. */
++
++ if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
++ return true;
++
++ size1 = GET_MODE_SIZE (mode1);
++ size2 = GET_MODE_SIZE (mode2);
++ if ((size1 > 4 && size2 == 4)
++ || (size2 > 4 && size1 == 4))
++ return false;
++
++ return true;
++}
++
++/* Implement TARGET_CSTORE_MODE. */
++
++static scalar_int_mode
++sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
++{
++ return (TARGET_ARCH64 ? DImode : SImode);
++}
++
++/* Return the compound expression made of T1 and T2. */
++
++static inline tree
++compound_expr (tree t1, tree t2)
++{
++ return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
++}
++
++/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
++
++static void
++sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
++{
++ if (!TARGET_FPU)
++ return;
++
++ const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
++ const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
++
++ /* We generate the equivalent of feholdexcept (&fenv_var):
++
++ unsigned int fenv_var;
++ __builtin_store_fsr (&fenv_var);
++
++ unsigned int tmp1_var;
++ tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
++
++ __builtin_load_fsr (&tmp1_var); */
++
++ tree fenv_var = create_tmp_var_raw (unsigned_type_node);
++ TREE_ADDRESSABLE (fenv_var) = 1;
++ tree fenv_addr = build_fold_addr_expr (fenv_var);
++ tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
++ tree hold_stfsr
++ = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
++ build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
++
++ tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
++ TREE_ADDRESSABLE (tmp1_var) = 1;
++ tree masked_fenv_var
++ = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
++ build_int_cst (unsigned_type_node,
++ ~(accrued_exception_mask | trap_enable_mask)));
++ tree hold_mask
++ = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
++ NULL_TREE, NULL_TREE);
++
++ tree tmp1_addr = build_fold_addr_expr (tmp1_var);
++ tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
++ tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
++
++ *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
++
++ /* We reload the value of tmp1_var to clear the exceptions:
++
++ __builtin_load_fsr (&tmp1_var); */
++
++ *clear = build_call_expr (ldfsr, 1, tmp1_addr);
++
++ /* We generate the equivalent of feupdateenv (&fenv_var):
++
++ unsigned int tmp2_var;
++ __builtin_store_fsr (&tmp2_var);
++
++ __builtin_load_fsr (&fenv_var);
++
++ if (SPARC_LOW_FE_EXCEPT_VALUES)
++ tmp2_var >>= 5;
++ __atomic_feraiseexcept ((int) tmp2_var); */
++
++ tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
++ TREE_ADDRESSABLE (tmp2_var) = 1;
++ tree tmp2_addr = build_fold_addr_expr (tmp2_var);
++ tree update_stfsr
++ = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
++ build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
++
++ tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
++
++ tree atomic_feraiseexcept
++ = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
++ tree update_call
++ = build_call_expr (atomic_feraiseexcept, 1,
++ fold_convert (integer_type_node, tmp2_var));
++
++ if (SPARC_LOW_FE_EXCEPT_VALUES)
++ {
++ tree shifted_tmp2_var
++ = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
++ build_int_cst (unsigned_type_node, 5));
++ tree update_shift
++ = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
++ update_call = compound_expr (update_shift, update_call);
++ }
++
++ *update
++ = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
++}
++
++/* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
++
++ SImode loads to floating-point registers are not zero-extended.
++ The definition for LOAD_EXTEND_OP specifies that integer loads
++ narrower than BITS_PER_WORD will be zero-extended. As a result,
++ we inhibit changes from SImode unless they are to a mode that is
++ identical in size.
++
++ Likewise for SFmode, since word-mode paradoxical subregs are
++ problematic on big-endian architectures. */
++
++static bool
++sparc_can_change_mode_class (machine_mode from, machine_mode to,
++ reg_class_t rclass)
++{
++ if (TARGET_ARCH64
++ && GET_MODE_SIZE (from) == 4
++ && GET_MODE_SIZE (to) != 4)
++ return !reg_classes_intersect_p (rclass, FP_REGS);
++ return true;
++}
++
++/* Implement TARGET_CONSTANT_ALIGNMENT. */
++
++static HOST_WIDE_INT
++sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
++{
++ if (TREE_CODE (exp) == STRING_CST)
++ return MAX (align, FASTEST_ALIGNMENT);
++ return align;
++}
++
++#include "gt-sparc.h"
+diff -Nur gcc-10.3.0.orig/gcc/config/sparc/sparc.md gcc-10.3.0/gcc/config/sparc/sparc.md
+--- gcc-10.3.0.orig/gcc/config/sparc/sparc.md 2021-04-08 13:56:28.205742322 +0200
++++ gcc-10.3.0/gcc/config/sparc/sparc.md 2021-04-09 07:51:37.936504607 +0200
+@@ -1601,7 +1601,10 @@
+ (clobber (reg:P O7_REG))]
+ "REGNO (operands[0]) == INTVAL (operands[3])"
+ {
+- return output_load_pcrel_sym (operands);
++ if (flag_delayed_branch)
++ return "sethi\t%%hi(%a1-4), %0\n\tcall\t%a2\n\t add\t%0, %%lo(%a1+4), %0";
++ else
++ return "sethi\t%%hi(%a1-8), %0\n\tadd\t%0, %%lo(%a1-4), %0\n\tcall\t%a2\n\t nop";
+ }
+ [(set (attr "type") (const_string "multi"))
+ (set (attr "length")
+diff -Nur gcc-10.3.0.orig/gcc/config/sparc/sparc.md.orig gcc-10.3.0/gcc/config/sparc/sparc.md.orig
+--- gcc-10.3.0.orig/gcc/config/sparc/sparc.md.orig 1970-01-01 01:00:00.000000000 +0100
++++ gcc-10.3.0/gcc/config/sparc/sparc.md.orig 2021-04-08 13:56:28.205742322 +0200
+@@ -0,0 +1,9524 @@
++;; Machine description for SPARC.
++;; Copyright (C) 1987-2020 Free Software Foundation, Inc.
++;; Contributed by Michael Tiemann (tiemann@cygnus.com)
++;; 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
++;; at Cygnus Support.
++
++;; This file is part of GCC.
++
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++;; GNU General Public License for more details.
++
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3. If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_c_enum "unspec" [
++ UNSPEC_MOVE_PIC
++ UNSPEC_UPDATE_RETURN
++ UNSPEC_LOAD_PCREL_SYM
++ UNSPEC_FRAME_BLOCKAGE
++ UNSPEC_MOVE_PIC_LABEL
++ UNSPEC_SETH44
++ UNSPEC_SETM44
++ UNSPEC_SETHH
++ UNSPEC_SETLM
++ UNSPEC_EMB_HISUM
++ UNSPEC_EMB_TEXTUHI
++ UNSPEC_EMB_TEXTHI
++ UNSPEC_EMB_TEXTULO
++ UNSPEC_EMB_SETHM
++ UNSPEC_MOVE_GOTDATA
++
++ UNSPEC_MEMBAR
++ UNSPEC_ATOMIC
++
++ UNSPEC_TLSGD
++ UNSPEC_TLSLDM
++ UNSPEC_TLSLDO
++ UNSPEC_TLSIE
++ UNSPEC_TLSLE
++ UNSPEC_TLSLD_BASE
++
++ UNSPEC_FPACK16
++ UNSPEC_FPACK32
++ UNSPEC_FPACKFIX
++ UNSPEC_FEXPAND
++ UNSPEC_MUL16AU
++ UNSPEC_MUL16AL
++ UNSPEC_MUL8UL
++ UNSPEC_MULDUL
++ UNSPEC_ALIGNDATA
++ UNSPEC_FCMP
++ UNSPEC_PDIST
++ UNSPEC_EDGE8
++ UNSPEC_EDGE8L
++ UNSPEC_EDGE16
++ UNSPEC_EDGE16L
++ UNSPEC_EDGE32
++ UNSPEC_EDGE32L
++ UNSPEC_ARRAY8
++ UNSPEC_ARRAY16
++ UNSPEC_ARRAY32
++
++ UNSPEC_SP_SET
++ UNSPEC_SP_TEST
++
++ UNSPEC_EDGE8N
++ UNSPEC_EDGE8LN
++ UNSPEC_EDGE16N
++ UNSPEC_EDGE16LN
++ UNSPEC_EDGE32N
++ UNSPEC_EDGE32LN
++ UNSPEC_BSHUFFLE
++ UNSPEC_CMASK8
++ UNSPEC_CMASK16
++ UNSPEC_CMASK32
++ UNSPEC_FCHKSM16
++ UNSPEC_PDISTN
++ UNSPEC_FUCMP
++ UNSPEC_FHADD
++ UNSPEC_FHSUB
++ UNSPEC_XMUL
++ UNSPEC_MUL8
++ UNSPEC_MUL8SU
++ UNSPEC_MULDSU
++
++ UNSPEC_ADDV
++ UNSPEC_SUBV
++ UNSPEC_NEGV
++
++ UNSPEC_DICTUNPACK
++ UNSPEC_FPCMPSHL
++ UNSPEC_FPUCMPSHL
++ UNSPEC_FPCMPDESHL
++ UNSPEC_FPCMPURSHL
++])
++
++(define_c_enum "unspecv" [
++ UNSPECV_BLOCKAGE
++
++ UNSPECV_SPECULATION_BARRIER
++
++ UNSPECV_PROBE_STACK_RANGE
++
++ UNSPECV_FLUSHW
++ UNSPECV_SAVEW
++
++ UNSPECV_FLUSH
++
++ UNSPECV_LDSTUB
++ UNSPECV_SWAP
++ UNSPECV_CAS
++
++ UNSPECV_LDFSR
++ UNSPECV_STFSR
++])
++
++(define_constants
++ [(G0_REG 0)
++ (G1_REG 1)
++ (G2_REG 2)
++ (G3_REG 3)
++ (G4_REG 4)
++ (G5_REG 5)
++ (G6_REG 6)
++ (G7_REG 7)
++ (O0_REG 8)
++ (O1_REG 9)
++ (O2_REG 10)
++ (O3_REG 11)
++ (O4_REG 12)
++ (O5_REG 13)
++ (O6_REG 14)
++ (O7_REG 15)
++ (L0_REG 16)
++ (L1_REG 17)
++ (L2_REG 18)
++ (L3_REG 19)
++ (L4_REG 20)
++ (L5_REG 21)
++ (L6_REG 22)
++ (L7_REG 23)
++ (I0_REG 24)
++ (I1_REG 25)
++ (I2_REG 26)
++ (I3_REG 27)
++ (I4_REG 28)
++ (I5_REG 29)
++ (I6_REG 30)
++ (I7_REG 31)
++ (F0_REG 32)
++ (F1_REG 33)
++ (F2_REG 34)
++ (F3_REG 35)
++ (F4_REG 36)
++ (F5_REG 37)
++ (F6_REG 38)
++ (F7_REG 39)
++ (F8_REG 40)
++ (F9_REG 41)
++ (F10_REG 42)
++ (F11_REG 43)
++ (F12_REG 44)
++ (F13_REG 45)
++ (F14_REG 46)
++ (F15_REG 47)
++ (F16_REG 48)
++ (F17_REG 49)
++ (F18_REG 50)
++ (F19_REG 51)
++ (F20_REG 52)
++ (F21_REG 53)
++ (F22_REG 54)
++ (F23_REG 55)
++ (F24_REG 56)
++ (F25_REG 57)
++ (F26_REG 58)
++ (F27_REG 59)
++ (F28_REG 60)
++ (F29_REG 61)
++ (F30_REG 62)
++ (F31_REG 63)
++ (F32_REG 64)
++ (F34_REG 66)
++ (F36_REG 68)
++ (F38_REG 70)
++ (F40_REG 72)
++ (F42_REG 74)
++ (F44_REG 76)
++ (F46_REG 78)
++ (F48_REG 80)
++ (F50_REG 82)
++ (F52_REG 84)
++ (F54_REG 86)
++ (F56_REG 88)
++ (F58_REG 90)
++ (F60_REG 92)
++ (F62_REG 94)
++ (FCC0_REG 96)
++ (FCC1_REG 97)
++ (FCC2_REG 98)
++ (FCC3_REG 99)
++ (CC_REG 100)
++ (SFP_REG 101)
++ (GSR_REG 102)
++ ])
++
++(define_mode_iterator I [QI HI SI DI])
++(define_mode_iterator P [(SI "TARGET_ARCH32") (DI "TARGET_ARCH64")])
++(define_mode_iterator W [SI (DI "TARGET_ARCH64")])
++(define_mode_iterator F [SF DF TF])
++
++;; The upper 32 fp regs on the v9 can't hold SFmode values. To deal with this
++;; a second register class, EXTRA_FP_REGS, exists for the v9 chip. The name
++;; is a bit of a misnomer as it covers all 64 fp regs. The corresponding
++;; constraint letter is 'e'. To avoid any confusion, 'e' is used instead of
++;; 'f' for all DF/TFmode values, including those that are specific to the v8.
++
++;; Attribute for cpu type.
++;; These must match the values of enum sparc_processor_type in sparc-opts.h.
++(define_attr "cpu"
++ "v7,
++ cypress,
++ v8,
++ supersparc,
++ hypersparc,
++ leon,
++ leon3,
++ leon3v7,
++ sparclite,
++ f930,
++ f934,
++ sparclite86x,
++ sparclet,
++ tsc701,
++ v9,
++ ultrasparc,
++ ultrasparc3,
++ niagara,
++ niagara2,
++ niagara3,
++ niagara4,
++ niagara7,
++ m8"
++ (const (symbol_ref "sparc_cpu_attr")))
++
++;; Attribute for the instruction set.
++;; At present we only need to distinguish v9/!v9, but for clarity we
++;; test TARGET_V8 too.
++(define_attr "isa" "v7,v8,v9,sparclet"
++ (const
++ (cond [(symbol_ref "TARGET_V9") (const_string "v9")
++ (symbol_ref "TARGET_V8") (const_string "v8")
++ (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")]
++ (const_string "v7"))))
++
++(define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4,vis4b"
++ (const_string "none"))
++
++(define_attr "lra" "disabled,enabled"
++ (const_string "enabled"))
++
++(define_attr "enabled" ""
++ (cond [(eq_attr "cpu_feature" "none")
++ (cond [(eq_attr "lra" "disabled") (symbol_ref "!TARGET_LRA")] (const_int 1))
++ (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU")
++ (eq_attr "cpu_feature" "fpunotv9") (symbol_ref "TARGET_FPU && !TARGET_V9")
++ (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
++ (eq_attr "cpu_feature" "vis") (symbol_ref "TARGET_VIS")
++ (eq_attr "cpu_feature" "vis3") (symbol_ref "TARGET_VIS3")
++ (eq_attr "cpu_feature" "vis4") (symbol_ref "TARGET_VIS4")
++ (eq_attr "cpu_feature" "vis4b") (symbol_ref "TARGET_VIS4B")]
++ (const_int 0)))
++
++;; The SPARC instructions used by the backend are organized into a
++;; hierarchy using the insn attributes "type" and "subtype".
++;;
++;; The mnemonics used in the list below are the architectural names
++;; used in the Oracle SPARC Architecture specs. A / character
++;; separates the type from the subtype where appropriate. For
++;; brevity, text enclosed in {} denotes alternatives, while text
++;; enclosed in [] is optional.
++;;
++;; Please keep this list updated. It is of great help for keeping the
++;; correctness and coherence of the DFA schedulers.
++;;
++;; ialu: <empty>
++;; ialuX: ADD[X]C SUB[X]C
++;; shift: SLL[X] SRL[X] SRA[X]
++;; cmove: MOV{A,N,NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
++;; MOVF{A,N,U,G,UG,L,UL,LG,NE,E,UE,GE,UGE,LE,ULE,O}
++;; MOVR{Z,LEZ,LZ,NZ,GZ,GEZ}
++;; compare: ADDcc ADDCcc ANDcc ORcc SUBcc SUBCcc XORcc XNORcc
++;; imul: MULX SMUL[cc] UMUL UMULXHI XMULX XMULXHI
++;; idiv: UDIVX SDIVX
++;; flush: FLUSH
++;; load/regular: LD{UB,UH,UW} LDFSR
++;; load/prefetch: PREFETCH
++;; fpload: LDF LDDF LDQF
++;; sload: LD{SB,SH,SW}
++;; store: ST{B,H,W,X} STFSR
++;; fpstore: STF STDF STQF
++;; cbcond: CWB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
++;; CXB{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
++;; uncond_branch: BA BPA JMPL
++;; branch: B{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
++;; BP{NE,E,G,LE,GE,L,GU,LEU,CC,CS,POS,NEG,VC,VS}
++;; FB{U,G,UG,L,UL,LG,NE,BE,UE,GE,UGE,LE,ULE,O}
++;; call: CALL
++;; return: RESTORE RETURN
++;; fpmove: FABS{s,d,q} FMOV{s,d,q} FNEG{s,d,q}
++;; fpcmove: FMOV{S,D,Q}{icc,xcc,fcc}
++;; fpcrmove: FMOVR{s,d,q}{Z,LEZ,LZ,NZ,GZ,GEZ}
++;; fp: FADD{s,d,q} FSUB{s,d,q} FHSUB{s,d} FNHADD{s,d} FNADD{s,d}
++;; FiTO{s,d,q} FsTO{i,x,d,q} FdTO{i,x,s,q} FxTO{d,s,q} FqTO{i,x,s,d}
++;; fpcmp: FCMP{s,d,q} FCMPE{s,d,q}
++;; fpmul: FMADD{s,d} FMSUB{s,d} FMUL{s,d,q} FNMADD{s,d}
++;; FNMSUB{s,d} FNMUL{s,d} FNsMULd FsMULd
++;; FdMULq
++;; array: ARRAY{8,16,32}
++;; bmask: BMASK
++;; edge: EDGE{8,16,32}[L]cc
++;; edgen: EDGE{8,16,32}[L]n
++;; fpdivs: FDIV{s,q}
++;; fpsqrts: FSQRT{s,q}
++;; fpdivd: FDIVd
++;; fpsqrtd: FSQRTd
++;; lzd: LZCNT
++;; fga/addsub64: FP{ADD,SUB}64
++;; fga/fpu: FCHKSM16 FEXPANd FMEAN16 FPMERGE
++;; FS{LL,RA,RL}{16,32}
++;; fga/maxmin: FP{MAX,MIN}[U]{8,16,32}
++;; fga/cmask: CMASK{8,16,32}
++;; fga/other: BSHUFFLE FALIGNDATAg FP{ADD,SUB}[S]{8,16,32}
++;; FP{ADD,SUB}US{8,16} DICTUNPACK
++;; gsr/reg: RDGSR WRGSR
++;; gsr/alignaddr: ALIGNADDRESS[_LITTLE]
++;; vismv/double: FSRC2d
++;; vismv/single: MOVwTOs FSRC2s
++;; vismv/movstouw: MOVsTOuw
++;; vismv/movxtod: MOVxTOd
++;; vismv/movdtox: MOVdTOx
++;; visl/single: F{AND,NAND,NOR,OR,NOT1}s
++;; F{AND,OR}NOT{1,2}s
++;; FONEs F{ZERO,XNOR,XOR}s FNOT2s
++;; visl/double: FONEd FZEROd FNOT1d F{OR,AND,XOR}d F{NOR,NAND,XNOR}d
++;; F{OR,AND}NOT1d F{OR,AND}NOT2d
++;; viscmp: FPCMP{LE,GT,NE,EQ}{8,16,32} FPCMPU{LE,GT,NE,EQ}{8,16,32}
++;; FPCMP{LE,GT,EQ,NE}{8,16,32}SHL FPCMPU{LE,GT,EQ,NE}{8,16,32}SHL
++;; FPCMPDE{8,16,32}SHL FPCMPUR{8,16,32}SHL
++;; fgm_pack: FPACKFIX FPACK{8,16,32}
++;; fgm_mul: FMUL8SUx16 FMUL8ULx16 FMUL8x16 FMUL8x16AL
++;; FMUL8x16AU FMULD8SUx16 FMULD8ULx16
++;; pdist: PDIST
++;; pdistn: PDISTN
++
++(define_attr "type"
++ "ialu,compare,shift,
++ load,sload,store,
++ uncond_branch,branch,call,sibcall,call_no_delay_slot,return,
++ cbcond,uncond_cbcond,
++ imul,idiv,
++ fpload,fpstore,
++ fp,fpmove,
++ fpcmove,fpcrmove,
++ fpcmp,
++ fpmul,fpdivs,fpdivd,
++ fpsqrts,fpsqrtd,
++ fga,visl,vismv,viscmp,
++ fgm_pack,fgm_mul,pdist,pdistn,edge,edgen,gsr,array,bmask,
++ cmove,
++ ialuX,
++ multi,savew,flushw,iflush,trap,lzd"
++ (const_string "ialu"))
++
++(define_attr "subtype"
++ "single,double,movstouw,movxtod,movdtox,
++ addsub64,cmask,fpu,maxmin,other,
++ reg,alignaddr,
++ prefetch,regular"
++ (const_string "single"))
++
++;; True if branch/call has empty delay slot and will emit a nop in it
++(define_attr "empty_delay_slot" "false,true"
++ (symbol_ref "(empty_delay_slot (insn)
++ ? EMPTY_DELAY_SLOT_TRUE : EMPTY_DELAY_SLOT_FALSE)"))
++
++;; True if we are making use of compare-and-branch instructions.
++;; True if we should emit a nop after a cbcond instruction
++(define_attr "emit_cbcond_nop" "false,true"
++ (symbol_ref "(emit_cbcond_nop (insn)
++ ? EMIT_CBCOND_NOP_TRUE : EMIT_CBCOND_NOP_FALSE)"))
++
++(define_attr "branch_type" "none,icc,fcc,reg"
++ (const_string "none"))
++
++(define_attr "pic" "false,true"
++ (symbol_ref "(flag_pic != 0
++ ? PIC_TRUE : PIC_FALSE)"))
++
++(define_attr "calls_alloca" "false,true"
++ (symbol_ref "(cfun->calls_alloca != 0
++ ? CALLS_ALLOCA_TRUE : CALLS_ALLOCA_FALSE)"))
++
++(define_attr "calls_eh_return" "false,true"
++ (symbol_ref "(crtl->calls_eh_return != 0
++ ? CALLS_EH_RETURN_TRUE : CALLS_EH_RETURN_FALSE)"))
++
++(define_attr "leaf_function" "false,true"
++ (symbol_ref "(crtl->uses_only_leaf_regs != 0
++ ? LEAF_FUNCTION_TRUE : LEAF_FUNCTION_FALSE)"))
++
++(define_attr "delayed_branch" "false,true"
++ (symbol_ref "(flag_delayed_branch != 0
++ ? DELAYED_BRANCH_TRUE : DELAYED_BRANCH_FALSE)"))
++
++(define_attr "flat" "false,true"
++ (symbol_ref "(TARGET_FLAT != 0
++ ? FLAT_TRUE : FLAT_FALSE)"))
++
++(define_attr "fix_ut699" "false,true"
++ (symbol_ref "(sparc_fix_ut699 != 0
++ ? FIX_UT699_TRUE : FIX_UT699_FALSE)"))
++
++(define_attr "fix_b2bst" "false,true"
++ (symbol_ref "(sparc_fix_b2bst != 0
++ ? FIX_B2BST_TRUE : FIX_B2BST_FALSE)"))
++
++(define_attr "fix_lost_divsqrt" "false,true"
++ (symbol_ref "(sparc_fix_lost_divsqrt != 0
++ ? FIX_LOST_DIVSQRT_TRUE : FIX_LOST_DIVSQRT_FALSE)"))
++
++(define_attr "fix_gr712rc" "false,true"
++ (symbol_ref "(sparc_fix_gr712rc != 0
++ ? FIX_GR712RC_TRUE : FIX_GR712RC_FALSE)"))
++
++;; Length (in # of insns).
++;; Beware that setting a length greater or equal to 3 for conditional branches
++;; has a side-effect (see output_cbranch and output_v9branch).
++(define_attr "length" ""
++ (cond [(eq_attr "type" "uncond_branch,call")
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (eq_attr "type" "sibcall")
++ (if_then_else (ior (eq_attr "leaf_function" "true")
++ (eq_attr "flat" "true"))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 3)
++ (const_int 2))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1)))
++ (eq_attr "branch_type" "icc")
++ (if_then_else (match_operand 0 "v9_comparison_operator" "")
++ (if_then_else (lt (pc) (match_dup 1))
++ (if_then_else (lt (minus (match_dup 1) (pc)) (const_int 260000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3)))
++ (if_then_else (lt (minus (pc) (match_dup 1)) (const_int 260000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3))))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1)))
++ (eq_attr "branch_type" "fcc")
++ (if_then_else (match_operand 0 "fcc0_register_operand" "")
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (if_then_else (not (match_test "TARGET_V9"))
++ (const_int 3)
++ (const_int 2))
++ (if_then_else (not (match_test "TARGET_V9"))
++ (const_int 2)
++ (const_int 1)))
++ (if_then_else (lt (pc) (match_dup 2))
++ (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3)))
++ (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 260000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3)))))
++ (eq_attr "branch_type" "reg")
++ (if_then_else (lt (pc) (match_dup 2))
++ (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 32000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3)))
++ (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 32000))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 2)
++ (const_int 1))
++ (if_then_else (eq_attr "empty_delay_slot" "true")
++ (const_int 4)
++ (const_int 3))))
++ (eq_attr "type" "cbcond")
++ (if_then_else (lt (pc) (match_dup 3))
++ (if_then_else (lt (minus (match_dup 3) (pc)) (const_int 500))
++ (if_then_else (eq_attr "emit_cbcond_nop" "true")
++ (const_int 2)
++ (const_int 1))
++ (const_int 4))
++ (if_then_else (lt (minus (pc) (match_dup 3)) (const_int 500))
++ (if_then_else (eq_attr "emit_cbcond_nop" "true")
++ (const_int 2)
++ (const_int 1))
++ (const_int 4)))
++ (eq_attr "type" "uncond_cbcond")
++ (if_then_else (lt (pc) (match_dup 0))
++ (if_then_else (lt (minus (match_dup 0) (pc)) (const_int 500))
++ (if_then_else (eq_attr "emit_cbcond_nop" "true")
++ (const_int 2)
++ (const_int 1))
++ (const_int 1))
++ (if_then_else (lt (minus (pc) (match_dup 0)) (const_int 500))
++ (if_then_else (eq_attr "emit_cbcond_nop" "true")
++ (const_int 2)
++ (const_int 1))
++ (const_int 1)))
++ ] (const_int 1)))
++
++;; FP precision.
++(define_attr "fptype" "single,double"
++ (const_string "single"))
++
++;; FP precision specific to the UT699.
++(define_attr "fptype_ut699" "none,single"
++ (const_string "none"))
++
++;; UltraSPARC-III integer load type.
++(define_attr "us3load_type" "2cycle,3cycle"
++ (const_string "2cycle"))
++
++(define_asm_attributes
++ [(set_attr "length" "2")
++ (set_attr "type" "multi")])
++
++;; Attributes for branch scheduling
++(define_attr "tls_delay_slot" "false,true"
++ (symbol_ref "((TARGET_GNU_TLS && HAVE_GNU_LD) != 0
++ ? TLS_DELAY_SLOT_TRUE : TLS_DELAY_SLOT_FALSE)"))
++
++(define_attr "in_sibcall_delay" "false,true"
++ (symbol_ref "(eligible_for_sibcall_delay (insn)
++ ? IN_SIBCALL_DELAY_TRUE : IN_SIBCALL_DELAY_FALSE)"))
++
++(define_attr "in_return_delay" "false,true"
++ (symbol_ref "(eligible_for_return_delay (insn)
++ ? IN_RETURN_DELAY_TRUE : IN_RETURN_DELAY_FALSE)"))
++
++;; ??? !v9: Should implement the notion of predelay slots for floating-point
++;; branches. This would allow us to remove the nop always inserted before
++;; a floating point branch.
++
++;; ??? It is OK for fill_simple_delay_slots to put load/store instructions
++;; in a delay slot, but it is not OK for fill_eager_delay_slots to do so.
++;; This is because doing so will add several pipeline stalls to the path
++;; that the load/store did not come from. Unfortunately, there is no way
++;; to prevent fill_eager_delay_slots from using load/store without completely
++;; disabling them. For the SPEC benchmark set, this is a serious lose,
++;; because it prevents us from moving back the final store of inner loops.
++
++(define_attr "in_branch_delay" "false,true"
++ (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
++ (const_string "false")
++ (and (eq_attr "fix_lost_divsqrt" "true")
++ (eq_attr "type" "fpdivs,fpsqrts,fpdivd,fpsqrtd"))
++ (const_string "false")
++ (and (eq_attr "fix_b2bst" "true") (eq_attr "type" "store,fpstore"))
++ (const_string "false")
++ (and (eq_attr "fix_ut699" "true") (eq_attr "type" "load,sload"))
++ (const_string "false")
++ (and (eq_attr "fix_ut699" "true")
++ (and (eq_attr "type" "fpload,fp,fpmove,fpmul,fpdivs,fpsqrts")
++ (ior (eq_attr "fptype" "single")
++ (eq_attr "fptype_ut699" "single"))))
++ (const_string "false")
++ (eq_attr "length" "1")
++ (const_string "true")
++ ] (const_string "false")))
++
++(define_attr "in_integer_branch_annul_delay" "false,true"
++ (cond [(and (eq_attr "fix_gr712rc" "true")
++ (eq_attr "type" "fp,fpcmp,fpmove,fpcmove,fpmul,
++ fpdivs,fpsqrts,fpdivd,fpsqrtd"))
++ (const_string "false")
++ (eq_attr "in_branch_delay" "true")
++ (const_string "true")
++ ] (const_string "false")))
++
++(define_delay (eq_attr "type" "sibcall")
++ [(eq_attr "in_sibcall_delay" "true") (nil) (nil)])
++
++(define_delay (eq_attr "type" "return")
++ [(eq_attr "in_return_delay" "true") (nil) (nil)])
++
++(define_delay (ior (eq_attr "type" "call") (eq_attr "type" "uncond_branch"))
++ [(eq_attr "in_branch_delay" "true") (nil) (nil)])
++
++(define_delay (and (eq_attr "type" "branch") (not (eq_attr "branch_type" "icc")))
++ [(eq_attr "in_branch_delay" "true")
++ (nil)
++ (eq_attr "in_branch_delay" "true")])
++
++(define_delay (and (eq_attr "type" "branch") (eq_attr "branch_type" "icc"))
++ [(eq_attr "in_branch_delay" "true")
++ (nil)
++ (eq_attr "in_integer_branch_annul_delay" "true")])
++
++;; Include SPARC DFA schedulers
++
++(include "cypress.md")
++(include "supersparc.md")
++(include "hypersparc.md")
++(include "leon.md")
++(include "sparclet.md")
++(include "ultra1_2.md")
++(include "ultra3.md")
++(include "niagara.md")
++(include "niagara2.md")
++(include "niagara4.md")
++(include "niagara7.md")
++(include "m8.md")
++
++
++;; Operand and operator predicates and constraints
++
++(include "predicates.md")
++(include "constraints.md")
++
++
++;; Compare instructions.
++
++;; These are just the DEFINE_INSNs to match the patterns and the
++;; DEFINE_SPLITs for some of the scc insns that actually require
++;; more than one machine instruction. DEFINE_EXPANDs are further down.
++
++(define_insn "*cmpsi_insn"
++ [(set (reg:CC CC_REG)
++ (compare:CC (match_operand:SI 0 "register_operand" "r")
++ (match_operand:SI 1 "arith_operand" "rI")))]
++ ""
++ "cmp\t%0, %1"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpdi_sp64"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (match_operand:DI 0 "register_operand" "r")
++ (match_operand:DI 1 "arith_operand" "rI")))]
++ "TARGET_ARCH64"
++ "cmp\t%0, %1"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpsi_sne"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (not:SI (match_operand:SI 0 "arith_operand" "rI"))
++ (const_int -1)))]
++ ""
++ "cmp\t%%g0, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpdi_sne"
++ [(set (reg:CCXC CC_REG)
++ (compare:CCXC (not:DI (match_operand:DI 0 "arith_operand" "rI"))
++ (const_int -1)))]
++ "TARGET_ARCH64"
++ "cmp\t%%g0, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpsf_fpe"
++ [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
++ (compare:CCFPE (match_operand:SF 1 "register_operand" "f")
++ (match_operand:SF 2 "register_operand" "f")))]
++ "TARGET_FPU"
++{
++ if (TARGET_V9)
++ return "fcmpes\t%0, %1, %2";
++ return "fcmpes\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")])
++
++(define_insn "*cmpdf_fpe"
++ [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
++ (compare:CCFPE (match_operand:DF 1 "register_operand" "e")
++ (match_operand:DF 2 "register_operand" "e")))]
++ "TARGET_FPU"
++{
++ if (TARGET_V9)
++ return "fcmped\t%0, %1, %2";
++ return "fcmped\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")
++ (set_attr "fptype" "double")])
++
++(define_insn "*cmptf_fpe"
++ [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
++ (compare:CCFPE (match_operand:TF 1 "register_operand" "e")
++ (match_operand:TF 2 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++{
++ if (TARGET_V9)
++ return "fcmpeq\t%0, %1, %2";
++ return "fcmpeq\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")])
++
++(define_insn "*cmpsf_fp"
++ [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
++ (compare:CCFP (match_operand:SF 1 "register_operand" "f")
++ (match_operand:SF 2 "register_operand" "f")))]
++ "TARGET_FPU"
++{
++ if (TARGET_V9)
++ return "fcmps\t%0, %1, %2";
++ return "fcmps\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")])
++
++(define_insn "*cmpdf_fp"
++ [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
++ (compare:CCFP (match_operand:DF 1 "register_operand" "e")
++ (match_operand:DF 2 "register_operand" "e")))]
++ "TARGET_FPU"
++{
++ if (TARGET_V9)
++ return "fcmpd\t%0, %1, %2";
++ return "fcmpd\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")
++ (set_attr "fptype" "double")])
++
++(define_insn "*cmptf_fp"
++ [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
++ (compare:CCFP (match_operand:TF 1 "register_operand" "e")
++ (match_operand:TF 2 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++{
++ if (TARGET_V9)
++ return "fcmpq\t%0, %1, %2";
++ return "fcmpq\t%1, %2";
++}
++ [(set_attr "type" "fpcmp")])
++
++;; Next come the scc insns.
++
++;; Note that the boolean result (operand 0) takes on DImode
++;; (not SImode) when TARGET_ARCH64.
++
++(define_expand "cstoresi4"
++ [(use (match_operator 1 "comparison_operator"
++ [(match_operand:SI 2 "compare_operand" "")
++ (match_operand:SI 3 "arith_operand" "")]))
++ (clobber (match_operand:SI 0 "cstore_result_operand"))]
++ ""
++{
++ if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
++ operands[2] = force_reg (SImode, operands[2]);
++ if (emit_scc_insn (operands)) DONE; else FAIL;
++})
++
++(define_expand "cstoredi4"
++ [(use (match_operator 1 "comparison_operator"
++ [(match_operand:DI 2 "compare_operand" "")
++ (match_operand:DI 3 "arith_operand" "")]))
++ (clobber (match_operand:SI 0 "cstore_result_operand"))]
++ "TARGET_ARCH64"
++{
++ if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
++ operands[2] = force_reg (DImode, operands[2]);
++ if (emit_scc_insn (operands)) DONE; else FAIL;
++})
++
++(define_expand "cstore<F:mode>4"
++ [(use (match_operator 1 "comparison_operator"
++ [(match_operand:F 2 "register_operand" "")
++ (match_operand:F 3 "register_operand" "")]))
++ (clobber (match_operand:SI 0 "cstore_result_operand"))]
++ "TARGET_FPU"
++{
++ if (emit_scc_insn (operands)) DONE; else FAIL;
++})
++
++;; The SNE and SEQ patterns are special because they can be done
++;; without any branching and do not involve a COMPARE.
++
++(define_insn_and_split "*snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0)))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (ltu:W (reg:CCC CC_REG) (const_int 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*neg_snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (neg:W (ltu:W (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=&r")
++ (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0)))]
++ "TARGET_ARCH64 && !TARGET_VIS3"
++ "#"
++ "&& !reg_overlap_mentioned_p (operands[1], operands[0])"
++ [(set (match_dup 0) (const_int 0))
++ (set (match_dup 0) (if_then_else:W (ne:DI (match_dup 1) (const_int 0))
++ (const_int 1)
++ (match_dup 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*snedi<W:mode>_zero_vis3"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0)))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_VIS3"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (ltu:W (reg:CCXC CC_REG) (const_int 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*neg_snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=&r")
++ (neg:W (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))))]
++ "TARGET_ARCH64 && !TARGET_SUBXC"
++ "#"
++ "&& !reg_overlap_mentioned_p (operands[1], operands[0])"
++ [(set (match_dup 0) (const_int 0))
++ (set (match_dup 0) (if_then_else:W (ne:DI (match_dup 1) (const_int 0))
++ (const_int -1)
++ (match_dup 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*neg_snedi<W:mode>_zero_subxc"
++ [(set (match_operand:W 0 "register_operand" "=&r")
++ (neg:W (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_SUBXC"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (neg:W (ltu:W (reg:CCXC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*seqsi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (eq:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0)))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (geu:W (reg:CCC CC_REG) (const_int 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*neg_seqsi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (eq:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (neg:W (geu:W (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*seqdi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=&r")
++ (eq:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "#"
++ "&& !reg_overlap_mentioned_p (operands[1], operands[0])"
++ [(set (match_dup 0) (const_int 0))
++ (set (match_dup 0) (if_then_else:W (eq:DI (match_dup 1) (const_int 0))
++ (const_int 1)
++ (match_dup 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*neg_seqdi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=&r")
++ (neg:W (eq:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))))]
++ "TARGET_ARCH64"
++ "#"
++ "&& !reg_overlap_mentioned_p (operands[1], operands[0])"
++ [(set (match_dup 0) (const_int 0))
++ (set (match_dup 0) (if_then_else:W (eq:DI (match_dup 1) (const_int 0))
++ (const_int -1)
++ (match_dup 0)))]
++ ""
++ [(set_attr "length" "2")])
++
++;; We can also do (x + (i == 0)) and related, so put them in.
++
++(define_insn_and_split "*plus_snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))
++ (match_operand:W 2 "register_operand" "r")))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (plus:W (ltu:W (reg:CCC CC_REG) (const_int 0))
++ (match_dup 2)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*plus_plus_snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))
++ (match_operand:W 2 "register_operand" "r"))
++ (match_operand:W 3 "register_operand" "r")))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (plus:W (plus:W (ltu:W (reg:CCC CC_REG) (const_int 0))
++ (match_dup 2))
++ (match_dup 3)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*plus_snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))
++ (match_operand:W 2 "register_operand" "r")))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_VIS3"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (plus:W (ltu:W (reg:CCXC CC_REG) (const_int 0))
++ (match_dup 2)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*plus_plus_snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))
++ (match_operand:W 2 "register_operand" "r"))
++ (match_operand:W 3 "register_operand" "r")))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_VIS3"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (plus:W (plus:W (ltu:W (reg:CCXC CC_REG) (const_int 0))
++ (match_dup 2))
++ (match_dup 3)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*minus_snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 2 "register_operand" "r")
++ (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (minus:W (match_dup 2)
++ (ltu:W (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*minus_minus_snesi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 2 "register_operand" "r")
++ (ne:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0)))
++ (match_operand:W 3 "register_operand" "r")))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (minus:W (minus:W (match_dup 2)
++ (ltu:W (reg:CCC CC_REG) (const_int 0)))
++ (match_dup 3)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*minus_snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 2 "register_operand" "r")
++ (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_SUBXC"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (minus:W (match_dup 2)
++ (ltu:W (reg:CCXC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*minus_minus_snedi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 2 "register_operand" "r")
++ (ne:W (match_operand:DI 1 "register_operand" "r")
++ (const_int 0)))
++ (match_operand:W 3 "register_operand" "r")))
++ (clobber (reg:CCX CC_REG))]
++ "TARGET_ARCH64 && TARGET_SUBXC"
++ "#"
++ ""
++ [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (minus:W (minus:W (match_dup 2)
++ (ltu:W (reg:CCXC CC_REG) (const_int 0)))
++ (match_dup 3)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*plus_seqsi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (eq:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))
++ (match_operand:W 2 "register_operand" "r")))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (plus:W (geu:W (reg:CCC CC_REG) (const_int 0))
++ (match_dup 2)))]
++ ""
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*minus_seqsi<W:mode>_zero"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 2 "register_operand" "r")
++ (eq:W (match_operand:SI 1 "register_operand" "r")
++ (const_int 0))))
++ (clobber (reg:CC CC_REG))]
++ ""
++ "#"
++ ""
++ [(set (reg:CCC CC_REG) (compare:CCC (not:SI (match_dup 1)) (const_int -1)))
++ (set (match_dup 0) (minus:W (match_dup 2)
++ (geu:W (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ [(set_attr "length" "2")])
++
++;; We can also do GEU and LTU directly, but these operate after a compare.
++
++(define_insn "*sltu<W:mode>_insn"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (ltu:W (match_operand 1 "icc_register_operand" "X") (const_int 0)))]
++ "GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode"
++ "addx\t%%g0, 0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*plus_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "arith_operand" "rI")))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "addx\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*plus_plus_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "register_operand" "%r"))
++ (match_operand:W 2 "arith_operand" "rI")))]
++ "GET_MODE (operands[3]) == CCmode || GET_MODE (operands[3]) == CCCmode"
++ "addx\t%1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_sgeu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (geu:W (match_operand 1 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode"
++ "addx\t%%g0, -1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_sgeusidi"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (sign_extend:DI (neg:SI (geu:SI (match_operand 1 "icc_register_operand" "X")
++ (const_int 0)))))]
++ "TARGET_ARCH64
++ && (GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode)"
++ "addx\t%%g0, -1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_sgeu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 1 "register_operand" "r")
++ (geu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "addx\t%1, -1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*addx<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (match_operand:W 1 "register_operand" "%r")
++ (match_operand:W 2 "arith_operand" "rI"))
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[3]) == CCmode || GET_MODE (operands[3]) == CCCmode"
++ "addx\t%1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*sltu<W:mode>_insn_vis3"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (ltu:W (match_operand 1 "icc_register_operand" "X") (const_int 0)))]
++ "TARGET_ARCH64 && TARGET_VIS3
++ && (GET_MODE (operands[1]) == CCXmode || GET_MODE (operands[1]) == CCXCmode)"
++ "addxc\t%%g0, %%g0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*plus_sltu<W:mode>_vis3"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "register_operand" "r")))]
++ "TARGET_ARCH64 && TARGET_VIS3
++ && (GET_MODE (operands[2]) == CCXmode || GET_MODE (operands[2]) == CCXCmode)"
++ "addxc\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*plus_plus_sltu<W:mode>_vis3"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "register_operand" "%r"))
++ (match_operand:W 2 "register_operand" "r")))]
++ "TARGET_ARCH64 && TARGET_VIS3
++ && (GET_MODE (operands[3]) == CCXmode || GET_MODE (operands[3]) == CCXCmode)"
++ "addxc\t%1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*addxc<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (plus:W (match_operand:W 1 "register_operand" "%r")
++ (match_operand:W 2 "register_operand" "r"))
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))))]
++ "TARGET_ARCH64 && TARGET_VIS3
++ && (GET_MODE (operands[3]) == CCXmode || GET_MODE (operands[3]) == CCXCmode)"
++ "addxc\t%1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (ltu:W (match_operand 1 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode"
++ "subx\t%%g0, 0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_sltusidi"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (sign_extend:DI (neg:SI (ltu:SI (match_operand 1 "icc_register_operand" "X")
++ (const_int 0)))))]
++ "TARGET_ARCH64
++ && (GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode)"
++ "subx\t%%g0, 0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_neg_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (neg:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0)))
++ (match_operand:W 1 "arith_operand" "rI")))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "subx\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_plus_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (plus:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "arith_operand" "rI"))))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "subx\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 1 "register_operand" "r")
++ (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "subx\t%1, 0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_minus_sltu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 1 "register_or_zero_operand" "rJ")
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0)))
++ (match_operand:W 2 "arith_operand" "rI")))]
++ "GET_MODE (operands[3]) == CCmode || GET_MODE (operands[3]) == CCCmode"
++ "subx\t%r1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*sgeu<W:mode>_insn"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (geu:W (match_operand 1 "icc_register_operand" "X") (const_int 0)))]
++ "GET_MODE (operands[1]) == CCmode || GET_MODE (operands[1]) == CCCmode"
++ "subx\t%%g0, -1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*plus_sgeu<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (plus:W (geu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "register_operand" "r")))]
++ "GET_MODE (operands[2]) == CCmode || GET_MODE (operands[2]) == CCCmode"
++ "subx\t%1, -1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*subx<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 1 "register_or_zero_operand" "rJ")
++ (match_operand:W 2 "arith_operand" "rI"))
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))))]
++ "GET_MODE (operands[3]) == CCmode || GET_MODE (operands[3]) == CCCmode"
++ "subx\t%r1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_sltu<W:mode>_subxc"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (ltu:W (match_operand 1 "icc_register_operand" "X")
++ (const_int 0))))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[1]) == CCXmode || GET_MODE (operands[1]) == CCXCmode)"
++ "subxc\t%%g0, %%g0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_neg_sltu<W:mode>_subxc"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (neg:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0)))
++ (match_operand:W 1 "register_operand" "r")))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[2]) == CCXmode || GET_MODE (operands[2]) == CCXCmode)"
++ "subxc\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*neg_plus_sltu<W:mode>_subxc"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (neg:W (plus:W (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))
++ (match_operand:W 1 "register_operand" "r"))))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[2]) == CCXmode || GET_MODE (operands[2]) == CCXCmode)"
++ "subxc\t%%g0, %1, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_sltu<W:mode>_subxc"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (match_operand:W 1 "register_operand" "r")
++ (ltu:W (match_operand 2 "icc_register_operand" "X")
++ (const_int 0))))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[2]) == CCXmode || GET_MODE (operands[2]) == CCXCmode)"
++ "subxc\t%1, %%g0, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*minus_minus_sltu<W:mode>_subxc"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 1 "register_or_zero_operand" "rJ")
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0)))
++ (match_operand:W 2 "register_operand" "r")))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[3]) == CCXmode || GET_MODE (operands[3]) == CCXCmode)"
++ "subxc\t%r1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_insn "*subxc<W:mode>"
++ [(set (match_operand:W 0 "register_operand" "=r")
++ (minus:W (minus:W (match_operand:W 1 "register_or_zero_operand" "rJ")
++ (match_operand:W 2 "register_operand" "r"))
++ (ltu:W (match_operand 3 "icc_register_operand" "X")
++ (const_int 0))))]
++ "TARGET_ARCH64 && TARGET_SUBXC
++ && (GET_MODE (operands[3]) == CCXmode || GET_MODE (operands[3]) == CCXCmode)"
++ "subxc\t%r1, %2, %0"
++ [(set_attr "type" "ialuX")])
++
++(define_split
++ [(set (match_operand:W 0 "register_operand" "")
++ (match_operator:W 1 "icc_comparison_operator"
++ [(match_operand 2 "icc_register_operand" "") (const_int 0)]))]
++ "TARGET_V9
++ /* 64-bit LTU is better implemented using addxc with VIS3. */
++ && !(GET_CODE (operands[1]) == LTU
++ && (GET_MODE (operands[2]) == CCXmode
++ || GET_MODE (operands[2]) == CCXCmode)
++ && TARGET_VIS3)
++ /* 32-bit LTU/GEU are better implemented using addx/subx. */
++ && !((GET_CODE (operands[1]) == LTU || GET_CODE (operands[1]) == GEU)
++ && (GET_MODE (operands[2]) == CCmode
++ || GET_MODE (operands[2]) == CCCmode))"
++ [(set (match_dup 0) (const_int 0))
++ (set (match_dup 0)
++ (if_then_else:SI (match_op_dup:W 1 [(match_dup 2) (const_int 0)])
++ (const_int 1)
++ (match_dup 0)))]
++ "")
++
++;; These control RTL generation for conditional jump insns
++
++(define_expand "cbranchcc4"
++ [(set (pc)
++ (if_then_else (match_operator 0 "comparison_operator"
++ [(match_operand 1 "compare_operand" "")
++ (match_operand 2 "const_zero_operand" "")])
++ (label_ref (match_operand 3 "" ""))
++ (pc)))]
++ ""
++ "")
++
++(define_expand "cbranchsi4"
++ [(use (match_operator 0 "comparison_operator"
++ [(match_operand:SI 1 "compare_operand" "")
++ (match_operand:SI 2 "arith_operand" "")]))
++ (use (match_operand 3 ""))]
++ ""
++{
++ if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
++ operands[1] = force_reg (SImode, operands[1]);
++ emit_conditional_branch_insn (operands);
++ DONE;
++})
++
++(define_expand "cbranchdi4"
++ [(use (match_operator 0 "comparison_operator"
++ [(match_operand:DI 1 "compare_operand" "")
++ (match_operand:DI 2 "arith_operand" "")]))
++ (use (match_operand 3 ""))]
++ "TARGET_ARCH64"
++{
++ if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
++ operands[1] = force_reg (DImode, operands[1]);
++ emit_conditional_branch_insn (operands);
++ DONE;
++})
++
++(define_expand "cbranch<F:mode>4"
++ [(use (match_operator 0 "comparison_operator"
++ [(match_operand:F 1 "register_operand" "")
++ (match_operand:F 2 "register_operand" "")]))
++ (use (match_operand 3 ""))]
++ "TARGET_FPU"
++{
++ emit_conditional_branch_insn (operands);
++ DONE;
++})
++
++
++;; Now match both normal and inverted jump.
++
++;; XXX fpcmp nop braindamage
++(define_insn "*normal_branch"
++ [(set (pc)
++ (if_then_else (match_operator 0 "icc_comparison_operator"
++ [(reg CC_REG) (const_int 0)])
++ (label_ref (match_operand 1 "" ""))
++ (pc)))]
++ ""
++{
++ return output_cbranch (operands[0], operands[1], 1, 0,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "icc")])
++
++;; XXX fpcmp nop braindamage
++(define_insn "*inverted_branch"
++ [(set (pc)
++ (if_then_else (match_operator 0 "icc_comparison_operator"
++ [(reg CC_REG) (const_int 0)])
++ (pc)
++ (label_ref (match_operand 1 "" ""))))]
++ ""
++{
++ return output_cbranch (operands[0], operands[1], 1, 1,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "icc")])
++
++;; XXX fpcmp nop braindamage
++(define_insn "*normal_fp_branch"
++ [(set (pc)
++ (if_then_else (match_operator 1 "comparison_operator"
++ [(match_operand:CCFP 0 "fcc_register_operand" "c")
++ (const_int 0)])
++ (label_ref (match_operand 2 "" ""))
++ (pc)))]
++ ""
++{
++ return output_cbranch (operands[1], operands[2], 2, 0,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "fcc")])
++
++;; XXX fpcmp nop braindamage
++(define_insn "*inverted_fp_branch"
++ [(set (pc)
++ (if_then_else (match_operator 1 "comparison_operator"
++ [(match_operand:CCFP 0 "fcc_register_operand" "c")
++ (const_int 0)])
++ (pc)
++ (label_ref (match_operand 2 "" ""))))]
++ ""
++{
++ return output_cbranch (operands[1], operands[2], 2, 1,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "fcc")])
++
++;; XXX fpcmp nop braindamage
++(define_insn "*normal_fpe_branch"
++ [(set (pc)
++ (if_then_else (match_operator 1 "comparison_operator"
++ [(match_operand:CCFPE 0 "fcc_register_operand" "c")
++ (const_int 0)])
++ (label_ref (match_operand 2 "" ""))
++ (pc)))]
++ ""
++{
++ return output_cbranch (operands[1], operands[2], 2, 0,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "fcc")])
++
++;; XXX fpcmp nop braindamage
++(define_insn "*inverted_fpe_branch"
++ [(set (pc)
++ (if_then_else (match_operator 1 "comparison_operator"
++ [(match_operand:CCFPE 0 "fcc_register_operand" "c")
++ (const_int 0)])
++ (pc)
++ (label_ref (match_operand 2 "" ""))))]
++ ""
++{
++ return output_cbranch (operands[1], operands[2], 2, 1,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "fcc")])
++
++;; SPARC V9-specific jump insns. None of these are guaranteed to be
++;; in the architecture.
++
++(define_insn "*cbcond_sp32"
++ [(set (pc)
++ (if_then_else (match_operator 0 "comparison_operator"
++ [(match_operand:SI 1 "register_operand" "r")
++ (match_operand:SI 2 "arith5_operand" "rA")])
++ (label_ref (match_operand 3 "" ""))
++ (pc)))]
++ "TARGET_CBCOND"
++{
++ return output_cbcond (operands[0], operands[3], insn);
++}
++ [(set_attr "type" "cbcond")])
++
++(define_insn "*cbcond_sp64"
++ [(set (pc)
++ (if_then_else (match_operator 0 "comparison_operator"
++ [(match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "arith5_operand" "rA")])
++ (label_ref (match_operand 3 "" ""))
++ (pc)))]
++ "TARGET_ARCH64 && TARGET_CBCOND"
++{
++ return output_cbcond (operands[0], operands[3], insn);
++}
++ [(set_attr "type" "cbcond")])
++
++;; There are no 32-bit brreg insns.
++
++(define_insn "*normal_int_branch_sp64"
++ [(set (pc)
++ (if_then_else (match_operator 0 "v9_register_comparison_operator"
++ [(match_operand:DI 1 "register_operand" "r")
++ (const_int 0)])
++ (label_ref (match_operand 2 "" ""))
++ (pc)))]
++ "TARGET_ARCH64"
++{
++ return output_v9branch (operands[0], operands[2], 1, 2, 0,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "reg")])
++
++(define_insn "*inverted_int_branch_sp64"
++ [(set (pc)
++ (if_then_else (match_operator 0 "v9_register_comparison_operator"
++ [(match_operand:DI 1 "register_operand" "r")
++ (const_int 0)])
++ (pc)
++ (label_ref (match_operand 2 "" ""))))]
++ "TARGET_ARCH64"
++{
++ return output_v9branch (operands[0], operands[2], 1, 2, 1,
++ final_sequence && INSN_ANNULLED_BRANCH_P (insn),
++ insn);
++}
++ [(set_attr "type" "branch")
++ (set_attr "branch_type" "reg")])
++
++
++;; Load in operand 0 the (absolute) address of operand 1, which is a symbolic
++;; value subject to a PC-relative relocation. Operand 2 is a helper function
++;; that adds the PC value at the call point to register #(operand 3).
++;;
++;; Even on V9 we use this call sequence with a stub, instead of "rd %pc, ..."
++;; because the RDPC instruction is extremely expensive and incurs a complete
++;; instruction pipeline flush.
++
++(define_insn "load_pcrel_sym<P:mode>"
++ [(set (match_operand:P 0 "register_operand" "=r")
++ (unspec:P [(match_operand:P 1 "symbolic_operand" "")
++ (match_operand:P 2 "call_address_operand" "")
++ (match_operand:P 3 "const_int_operand" "")]
++ UNSPEC_LOAD_PCREL_SYM))
++ (clobber (reg:P O7_REG))]
++ "REGNO (operands[0]) == INTVAL (operands[3])"
++{
++ return output_load_pcrel_sym (operands);
++}
++ [(set (attr "type") (const_string "multi"))
++ (set (attr "length")
++ (if_then_else (eq_attr "delayed_branch" "true")
++ (const_int 3)
++ (const_int 4)))])
++
++
++;; Integer move instructions
++
++(define_expand "movqi"
++ [(set (match_operand:QI 0 "nonimmediate_operand" "")
++ (match_operand:QI 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (QImode, operands))
++ DONE;
++})
++
++(define_insn "*movqi_insn"
++ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m")
++ (match_operand:QI 1 "input_operand" "rI,m,rJ"))]
++ "(register_operand (operands[0], QImode)
++ || register_or_zero_operand (operands[1], QImode))"
++ "@
++ mov\t%1, %0
++ ldub\t%1, %0
++ stb\t%r1, %0"
++ [(set_attr "type" "*,load,store")
++ (set_attr "subtype" "*,regular,*")
++ (set_attr "us3load_type" "*,3cycle,*")])
++
++(define_expand "movhi"
++ [(set (match_operand:HI 0 "nonimmediate_operand" "")
++ (match_operand:HI 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (HImode, operands))
++ DONE;
++})
++
++(define_insn "*movhi_insn"
++ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
++ (match_operand:HI 1 "input_operand" "rI,K,m,rJ"))]
++ "(register_operand (operands[0], HImode)
++ || register_or_zero_operand (operands[1], HImode))"
++ "@
++ mov\t%1, %0
++ sethi\t%%hi(%a1), %0
++ lduh\t%1, %0
++ sth\t%r1, %0"
++ [(set_attr "type" "*,*,load,store")
++ (set_attr "subtype" "*,*,regular,*")
++ (set_attr "us3load_type" "*,*,3cycle,*")])
++
++;; We always work with constants here.
++(define_insn "*movhi_lo_sum"
++ [(set (match_operand:HI 0 "register_operand" "=r")
++ (ior:HI (match_operand:HI 1 "register_operand" "%r")
++ (match_operand:HI 2 "small_int_operand" "I")))]
++ ""
++ "or\t%1, %2, %0")
++
++(define_expand "movsi"
++ [(set (match_operand:SI 0 "nonimmediate_operand" "")
++ (match_operand:SI 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (SImode, operands))
++ DONE;
++})
++
++(define_insn "*movsi_insn"
++ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m, r,*f,?*f,?*f, m,d,d")
++ (match_operand:SI 1 "input_operand" "rI,K,m,rJ,*f, r, f, m,?*f,J,P"))]
++ "register_operand (operands[0], SImode)
++ || register_or_zero_or_all_ones_operand (operands[1], SImode)"
++ "@
++ mov\t%1, %0
++ sethi\t%%hi(%a1), %0
++ ld\t%1, %0
++ st\t%r1, %0
++ movstouw\t%1, %0
++ movwtos\t%1, %0
++ fmovs\t%1, %0
++ ld\t%1, %0
++ st\t%1, %0
++ fzeros\t%0
++ fones\t%0"
++ [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
++ (set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
++ (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
++
++(define_insn "*movsi_lo_sum"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
++ (match_operand:SI 2 "immediate_operand" "in")))]
++ "!flag_pic"
++ "or\t%1, %%lo(%a2), %0")
++
++(define_insn "*movsi_high"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (high:SI (match_operand:SI 1 "immediate_operand" "in")))]
++ "!flag_pic"
++ "sethi\t%%hi(%a1), %0")
++
++;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC
++;; so that CSE won't optimize the address computation away.
++(define_insn "movsi_lo_sum_pic"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
++ (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")]
++ UNSPEC_MOVE_PIC)))]
++ "flag_pic"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "xor\t%1, %%gdop_lox10(%a2), %0";
++#else
++ return "or\t%1, %%lo(%a2), %0";
++#endif
++})
++
++(define_insn "movsi_high_pic"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (high:SI (unspec:SI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
++ "flag_pic && check_pic (1)"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "sethi\t%%gdop_hix22(%a1), %0";
++#else
++ return "sethi\t%%hi(%a1), %0";
++#endif
++})
++
++(define_insn "movsi_pic_gotdata_op"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")
++ (match_operand:SI 2 "register_operand" "r")
++ (match_operand 3 "symbolic_operand" "")]
++ UNSPEC_MOVE_GOTDATA))]
++ "flag_pic && check_pic (1)"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "ld\t[%1 + %2], %0, %%gdop(%a3)";
++#else
++ return "ld\t[%1 + %2], %0";
++#endif
++}
++ [(set_attr "type" "load")
++ (set_attr "subtype" "regular")])
++
++(define_expand "movsi_pic_label_ref"
++ [(set (match_dup 3) (high:SI
++ (unspec:SI [(match_operand:SI 1 "symbolic_operand" "")
++ (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
++ (set (match_dup 4) (lo_sum:SI (match_dup 3)
++ (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (minus:SI (match_dup 5) (match_dup 4)))]
++ "flag_pic"
++{
++ crtl->uses_pic_offset_table = 1;
++ operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++ if (!can_create_pseudo_p ())
++ {
++ operands[3] = operands[0];
++ operands[4] = operands[0];
++ }
++ else
++ {
++ operands[3] = gen_reg_rtx (SImode);
++ operands[4] = gen_reg_rtx (SImode);
++ }
++ operands[5] = pic_offset_table_rtx;
++})
++
++(define_insn "*movsi_high_pic_label_ref"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (high:SI
++ (unspec:SI [(match_operand:SI 1 "symbolic_operand" "")
++ (match_operand:SI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
++ "flag_pic"
++ "sethi\t%%hi(%a2-(%a1-.)), %0")
++
++(define_insn "*movsi_lo_sum_pic_label_ref"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
++ (unspec:SI [(match_operand:SI 2 "symbolic_operand" "")
++ (match_operand:SI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
++ "flag_pic"
++ "or\t%1, %%lo(%a3-(%a2-.)), %0")
++
++;; Set up the PIC register for VxWorks.
++
++(define_expand "vxworks_load_got"
++ [(set (match_dup 0)
++ (high:SI (match_dup 1)))
++ (set (match_dup 0)
++ (mem:SI (lo_sum:SI (match_dup 0) (match_dup 1))))
++ (set (match_dup 0)
++ (mem:SI (lo_sum:SI (match_dup 0) (match_dup 2))))]
++ "TARGET_VXWORKS_RTP"
++{
++ operands[0] = pic_offset_table_rtx;
++ operands[1] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_BASE);
++ operands[2] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_INDEX);
++})
++
++(define_expand "movdi"
++ [(set (match_operand:DI 0 "nonimmediate_operand" "")
++ (match_operand:DI 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (DImode, operands))
++ DONE;
++})
++
++;; Be careful, fmovd does not exist when !v9.
++;; We match MEM moves directly when we have correct even
++;; numbered registers, but fall into splits otherwise.
++;; The constraint ordering here is really important to
++;; avoid insane problems in reload, especially for patterns
++;; of the form:
++;;
++;; (set (mem:DI (plus:SI (reg:SI 30 %fp)
++;; (const_int -5016)))
++;; (reg:DI 2 %g2))
++;;
++
++(define_insn "*movdi_insn_sp32"
++ [(set (match_operand:DI 0 "nonimmediate_operand"
++ "=T,o,U,T,r,o,r,r,?*f, T,?*f, o,?*e,?*e, r,?*f,?*e, T,*b,*b")
++ (match_operand:DI 1 "input_operand"
++ " J,J,T,U,o,r,i,r, T,?*f, o,?*f, *e, *e,?*f, r, T,?*e, J, P"))]
++ "TARGET_ARCH32
++ && (register_operand (operands[0], DImode)
++ || register_or_zero_operand (operands[1], DImode))"
++ "@
++ stx\t%r1, %0
++ #
++ ldd\t%1, %0
++ std\t%1, %0
++ ldd\t%1, %0
++ std\t%1, %0
++ #
++ #
++ ldd\t%1, %0
++ std\t%1, %0
++ #
++ #
++ fmovd\t%1, %0
++ #
++ #
++ #
++ ldd\t%1, %0
++ std\t%1, %0
++ fzero\t%0
++ fone\t%0"
++ [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,
++visl")
++ (set_attr "subtype" "*,*,regular,*,regular,*,*,*,*,*,*,*,*,*,*,*,*,*,double,double")
++ (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
++ (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
++ (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
++ (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
++
++(define_insn "*movdi_insn_sp64"
++ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e, W,b,b")
++ (match_operand:DI 1 "input_operand" "rI,N,m,rJ,*e, r, *e, W,?*e,J,P"))]
++ "TARGET_ARCH64
++ && (register_operand (operands[0], DImode)
++ || register_or_zero_or_all_ones_operand (operands[1], DImode))"
++ "@
++ mov\t%1, %0
++ sethi\t%%hi(%a1), %0
++ ldx\t%1, %0
++ stx\t%r1, %0
++ movdtox\t%1, %0
++ movxtod\t%1, %0
++ fmovd\t%1, %0
++ ldd\t%1, %0
++ std\t%1, %0
++ fzero\t%0
++ fone\t%0"
++ [(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
++ (set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
++ (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
++ (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
++
++(define_expand "movdi_pic_label_ref"
++ [(set (match_dup 3) (high:DI
++ (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")
++ (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
++ (set (match_dup 4) (lo_sum:DI (match_dup 3)
++ (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (minus:DI (match_dup 5) (match_dup 4)))]
++ "TARGET_ARCH64 && flag_pic"
++{
++ crtl->uses_pic_offset_table = 1;
++ operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++ if (!can_create_pseudo_p ())
++ {
++ operands[3] = operands[0];
++ operands[4] = operands[0];
++ }
++ else
++ {
++ operands[3] = gen_reg_rtx (DImode);
++ operands[4] = gen_reg_rtx (DImode);
++ }
++ operands[5] = pic_offset_table_rtx;
++})
++
++(define_insn "*movdi_high_pic_label_ref"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI
++ (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")
++ (match_operand:DI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
++ "TARGET_ARCH64 && flag_pic"
++ "sethi\t%%hi(%a2-(%a1-.)), %0")
++
++(define_insn "*movdi_lo_sum_pic_label_ref"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")
++ (match_operand:DI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
++ "TARGET_ARCH64 && flag_pic"
++ "or\t%1, %%lo(%a3-(%a2-.)), %0")
++
++;; SPARC-v9 code model support insns. See sparc_emit_set_symbolic_const64
++;; in sparc.c to see what is going on here... PIC stuff comes first.
++
++(define_insn "movdi_lo_sum_pic"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")]
++ UNSPEC_MOVE_PIC)))]
++ "TARGET_ARCH64 && flag_pic"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "xor\t%1, %%gdop_lox10(%a2), %0";
++#else
++ return "or\t%1, %%lo(%a2), %0";
++#endif
++})
++
++(define_insn "movdi_high_pic"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
++ "TARGET_ARCH64 && flag_pic && check_pic (1)"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "sethi\t%%gdop_hix22(%a1), %0";
++#else
++ return "sethi\t%%hi(%a1), %0";
++#endif
++})
++
++(define_insn "movdi_pic_gotdata_op"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "register_operand" "r")
++ (match_operand 3 "symbolic_operand" "")]
++ UNSPEC_MOVE_GOTDATA))]
++ "TARGET_ARCH64 && flag_pic && check_pic (1)"
++{
++#ifdef HAVE_AS_SPARC_GOTDATA_OP
++ return "ldx\t[%1 + %2], %0, %%gdop(%a3)";
++#else
++ return "ldx\t[%1 + %2], %0";
++#endif
++}
++ [(set_attr "type" "load")
++ (set_attr "subtype" "regular")])
++
++(define_insn "*sethi_di_medlow_embmedany_pic"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (match_operand:DI 1 "medium_pic_operand" "")))]
++ "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && flag_pic && check_pic (1)"
++ "sethi\t%%hi(%a1), %0")
++
++(define_insn "*sethi_di_medlow"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (match_operand:DI 1 "symbolic_operand" "")))]
++ "TARGET_CM_MEDLOW && !flag_pic"
++ "sethi\t%%hi(%a1), %0")
++
++(define_insn "*losum_di_medlow"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "symbolic_operand" "")))]
++ "TARGET_CM_MEDLOW && !flag_pic"
++ "or\t%1, %%lo(%a2), %0")
++
++(define_insn "seth44"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
++ UNSPEC_SETH44)))]
++ "TARGET_CM_MEDMID && !flag_pic"
++ "sethi\t%%h44(%a1), %0")
++
++(define_insn "setm44"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
++ UNSPEC_SETM44)))]
++ "TARGET_CM_MEDMID && !flag_pic"
++ "or\t%1, %%m44(%a2), %0")
++
++(define_insn "setl44"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "symbolic_operand" "")))]
++ "TARGET_CM_MEDMID && !flag_pic"
++ "or\t%1, %%l44(%a2), %0")
++
++(define_insn "sethh"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
++ UNSPEC_SETHH)))]
++ "TARGET_CM_MEDANY && !flag_pic"
++ "sethi\t%%hh(%a1), %0")
++
++(define_insn "setlm"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
++ UNSPEC_SETLM)))]
++ "TARGET_CM_MEDANY && !flag_pic"
++ "sethi\t%%lm(%a1), %0")
++
++(define_insn "sethm"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
++ UNSPEC_EMB_SETHM)))]
++ "TARGET_CM_MEDANY && !flag_pic"
++ "or\t%1, %%hm(%a2), %0")
++
++(define_insn "setlo"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "symbolic_operand" "")))]
++ "TARGET_CM_MEDANY && !flag_pic"
++ "or\t%1, %%lo(%a2), %0")
++
++(define_insn "embmedany_sethi"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")]
++ UNSPEC_EMB_HISUM)))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "sethi\t%%hi(%a1), %0")
++
++(define_insn "embmedany_losum"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "data_segment_operand" "")))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "add\t%1, %%lo(%a2), %0")
++
++(define_insn "embmedany_brsum"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (unspec:DI [(match_operand:DI 1 "register_operand" "r")]
++ UNSPEC_EMB_HISUM))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "add\t%1, %_, %0")
++
++(define_insn "embmedany_textuhi"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")]
++ UNSPEC_EMB_TEXTUHI)))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "sethi\t%%uhi(%a1), %0")
++
++(define_insn "embmedany_texthi"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")]
++ UNSPEC_EMB_TEXTHI)))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "sethi\t%%hi(%a1), %0")
++
++(define_insn "embmedany_textulo"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")]
++ UNSPEC_EMB_TEXTULO)))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "or\t%1, %%ulo(%a2), %0")
++
++(define_insn "embmedany_textlo"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "text_segment_operand" "")))]
++ "TARGET_CM_EMBMEDANY && !flag_pic"
++ "or\t%1, %%lo(%a2), %0")
++
++;; Now some patterns to help reload out a bit.
++(define_expand "reload_indi"
++ [(parallel [(match_operand:DI 0 "register_operand" "=r")
++ (match_operand:DI 1 "immediate_operand" "")
++ (match_operand:TI 2 "register_operand" "=&r")])]
++ "(TARGET_CM_MEDANY || TARGET_CM_EMBMEDANY) && !flag_pic"
++{
++ sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
++ DONE;
++})
++
++(define_expand "reload_outdi"
++ [(parallel [(match_operand:DI 0 "register_operand" "=r")
++ (match_operand:DI 1 "immediate_operand" "")
++ (match_operand:TI 2 "register_operand" "=&r")])]
++ "(TARGET_CM_MEDANY || TARGET_CM_EMBMEDANY) && !flag_pic"
++{
++ sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
++ DONE;
++})
++
++;; Split up putting CONSTs and REGs into DI regs when !arch64
++(define_split
++ [(set (match_operand:DI 0 "register_operand" "")
++ (match_operand:DI 1 "const_int_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && ((GET_CODE (operands[0]) == REG
++ && SPARC_INT_REG_P (REGNO (operands[0])))
++ || (GET_CODE (operands[0]) == SUBREG
++ && GET_CODE (SUBREG_REG (operands[0])) == REG
++ && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
++ [(clobber (const_int 0))]
++{
++ HOST_WIDE_INT low = trunc_int_for_mode (INTVAL (operands[1]), SImode);
++ HOST_WIDE_INT high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode);
++ rtx high_part = gen_highpart (SImode, operands[0]);
++ rtx low_part = gen_lowpart (SImode, operands[0]);
++
++ emit_move_insn_1 (high_part, GEN_INT (high));
++
++ /* Slick... but this loses if the constant can be done in one insn. */
++ if (low == high && !SPARC_SETHI32_P (high) && !SPARC_SIMM13_P (high))
++ emit_move_insn_1 (low_part, high_part);
++ else
++ emit_move_insn_1 (low_part, GEN_INT (low));
++
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DI 0 "register_operand" "")
++ (match_operand:DI 1 "register_operand" ""))]
++ "reload_completed
++ && (!TARGET_V9
++ || (TARGET_ARCH32
++ && sparc_split_reg_reg_legitimate (operands[0], operands[1])))"
++ [(clobber (const_int 0))]
++{
++ sparc_split_reg_reg (operands[0], operands[1], SImode);
++ DONE;
++})
++
++;; Now handle the cases of memory moves from/to non-even
++;; DI mode register pairs.
++(define_split
++ [(set (match_operand:DI 0 "register_operand" "")
++ (match_operand:DI 1 "memory_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && sparc_split_reg_mem_legitimate (operands[0], operands[1])"
++ [(clobber (const_int 0))]
++{
++ sparc_split_reg_mem (operands[0], operands[1], SImode);
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DI 0 "memory_operand" "")
++ (match_operand:DI 1 "register_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && sparc_split_reg_mem_legitimate (operands[1], operands[0])"
++ [(clobber (const_int 0))]
++{
++ sparc_split_mem_reg (operands[0], operands[1], SImode);
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DI 0 "memory_operand" "")
++ (match_operand:DI 1 "const_zero_operand" ""))]
++ "reload_completed
++ && (!TARGET_V9
++ || (TARGET_ARCH32
++ && !mem_min_alignment (operands[0], 8)))
++ && offsettable_memref_p (operands[0])"
++ [(clobber (const_int 0))]
++{
++ emit_move_insn_1 (adjust_address (operands[0], SImode, 0), const0_rtx);
++ emit_move_insn_1 (adjust_address (operands[0], SImode, 4), const0_rtx);
++ DONE;
++})
++
++(define_expand "movti"
++ [(set (match_operand:TI 0 "nonimmediate_operand" "")
++ (match_operand:TI 1 "general_operand" ""))]
++ "TARGET_ARCH64"
++{
++ if (sparc_expand_move (TImode, operands))
++ DONE;
++})
++
++;; We need to prevent reload from splitting TImode moves, because it
++;; might decide to overwrite a pointer with the value it points to.
++;; In that case we have to do the loads in the appropriate order so
++;; that the pointer is not destroyed too early.
++
++(define_insn "*movti_insn_sp64"
++ [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?o,b")
++ (match_operand:TI 1 "input_operand" "roJ,rJ, eo, e,J"))]
++ "TARGET_ARCH64
++ && !TARGET_HARD_QUAD
++ && (register_operand (operands[0], TImode)
++ || register_or_zero_operand (operands[1], TImode))"
++ "#"
++ [(set_attr "length" "2,2,2,2,2")
++ (set_attr "cpu_feature" "*,*,fpu,fpu,vis")])
++
++(define_insn "*movti_insn_sp64_hq"
++ [(set (match_operand:TI 0 "nonimmediate_operand" "=r , o,?*e,?*e,?m,b")
++ (match_operand:TI 1 "input_operand" "roJ,rJ, e, m, e,J"))]
++ "TARGET_ARCH64
++ && TARGET_HARD_QUAD
++ && (register_operand (operands[0], TImode)
++ || register_or_zero_operand (operands[1], TImode))"
++ "@
++ #
++ #
++ fmovq\t%1, %0
++ ldq\t%1, %0
++ stq\t%1, %0
++ #"
++ [(set_attr "type" "*,*,fpmove,fpload,fpstore,*")
++ (set_attr "length" "2,2,*,*,*,2")])
++
++;; Now all the splits to handle multi-insn TI mode moves.
++(define_split
++ [(set (match_operand:TI 0 "register_operand" "")
++ (match_operand:TI 1 "register_operand" ""))]
++ "reload_completed
++ && ((TARGET_FPU
++ && !TARGET_HARD_QUAD)
++ || (!fp_register_operand (operands[0], TImode)
++ && !fp_register_operand (operands[1], TImode)))"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx set_src = operands[1];
++ rtx dest1, dest2;
++ rtx src1, src2;
++
++ dest1 = gen_highpart (DImode, set_dest);
++ dest2 = gen_lowpart (DImode, set_dest);
++ src1 = gen_highpart (DImode, set_src);
++ src2 = gen_lowpart (DImode, set_src);
++
++ /* Now emit using the real source and destination we found, swapping
++ the order if we detect overlap. */
++ if (reg_overlap_mentioned_p (dest1, src2))
++ {
++ emit_insn (gen_movdi (dest2, src2));
++ emit_insn (gen_movdi (dest1, src1));
++ }
++ else
++ {
++ emit_insn (gen_movdi (dest1, src1));
++ emit_insn (gen_movdi (dest2, src2));
++ }
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TI 0 "nonimmediate_operand" "")
++ (match_operand:TI 1 "const_zero_operand" ""))]
++ "reload_completed"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx dest1, dest2;
++
++ switch (GET_CODE (set_dest))
++ {
++ case REG:
++ dest1 = gen_highpart (DImode, set_dest);
++ dest2 = gen_lowpart (DImode, set_dest);
++ break;
++ case MEM:
++ dest1 = adjust_address (set_dest, DImode, 0);
++ dest2 = adjust_address (set_dest, DImode, 8);
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ emit_insn (gen_movdi (dest1, const0_rtx));
++ emit_insn (gen_movdi (dest2, const0_rtx));
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TI 0 "register_operand" "")
++ (match_operand:TI 1 "memory_operand" ""))]
++ "reload_completed
++ && offsettable_memref_p (operands[1])
++ && (!TARGET_HARD_QUAD
++ || !fp_register_operand (operands[0], TImode))"
++ [(clobber (const_int 0))]
++{
++ rtx word0 = adjust_address (operands[1], DImode, 0);
++ rtx word1 = adjust_address (operands[1], DImode, 8);
++ rtx set_dest, dest1, dest2;
++
++ set_dest = operands[0];
++
++ dest1 = gen_highpart (DImode, set_dest);
++ dest2 = gen_lowpart (DImode, set_dest);
++
++ /* Now output, ordering such that we don't clobber any registers
++ mentioned in the address. */
++ if (reg_overlap_mentioned_p (dest1, word1))
++
++ {
++ emit_insn (gen_movdi (dest2, word1));
++ emit_insn (gen_movdi (dest1, word0));
++ }
++ else
++ {
++ emit_insn (gen_movdi (dest1, word0));
++ emit_insn (gen_movdi (dest2, word1));
++ }
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TI 0 "memory_operand" "")
++ (match_operand:TI 1 "register_operand" ""))]
++ "reload_completed
++ && offsettable_memref_p (operands[0])
++ && (!TARGET_HARD_QUAD
++ || !fp_register_operand (operands[1], TImode))"
++ [(clobber (const_int 0))]
++{
++ rtx set_src = operands[1];
++
++ emit_insn (gen_movdi (adjust_address (operands[0], DImode, 0),
++ gen_highpart (DImode, set_src)));
++ emit_insn (gen_movdi (adjust_address (operands[0], DImode, 8),
++ gen_lowpart (DImode, set_src)));
++ DONE;
++})
++
++
++;; Floating point move instructions
++
++(define_expand "movsf"
++ [(set (match_operand:SF 0 "nonimmediate_operand" "")
++ (match_operand:SF 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (SFmode, operands))
++ DONE;
++})
++
++(define_insn "*movsf_insn"
++ [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m, m")
++ (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))]
++ "(register_operand (operands[0], SFmode)
++ || register_or_zero_or_all_ones_operand (operands[1], SFmode))"
++{
++ if (GET_CODE (operands[1]) == CONST_DOUBLE
++ && (which_alternative == 3
++ || which_alternative == 4
++ || which_alternative == 5))
++ {
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
++ operands[1] = GEN_INT (i);
++ }
++
++ switch (which_alternative)
++ {
++ case 0:
++ return "fzeros\t%0";
++ case 1:
++ return "fones\t%0";
++ case 2:
++ return "fmovs\t%1, %0";
++ case 3:
++ return "mov\t%1, %0";
++ case 4:
++ return "sethi\t%%hi(%a1), %0";
++ case 5:
++ return "#";
++ case 6:
++ return "movstouw\t%1, %0";
++ case 7:
++ return "movwtos\t%1, %0";
++ case 8:
++ case 9:
++ return "ld\t%1, %0";
++ case 10:
++ case 11:
++ return "st\t%r1, %0";
++ default:
++ gcc_unreachable ();
++ }
++}
++ [(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
++ (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
++ (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
++
++;; The following 3 patterns build SFmode constants in integer registers.
++
++(define_insn "*movsf_lo_sum"
++ [(set (match_operand:SF 0 "register_operand" "=r")
++ (lo_sum:SF (match_operand:SF 1 "register_operand" "r")
++ (match_operand:SF 2 "fp_const_high_losum_operand" "S")))]
++ ""
++{
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[2]), i);
++ operands[2] = GEN_INT (i);
++ return "or\t%1, %%lo(%a2), %0";
++})
++
++(define_insn "*movsf_high"
++ [(set (match_operand:SF 0 "register_operand" "=r")
++ (high:SF (match_operand:SF 1 "fp_const_high_losum_operand" "S")))]
++ ""
++{
++ long i;
++
++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
++ operands[1] = GEN_INT (i);
++ return "sethi\t%%hi(%1), %0";
++})
++
++(define_split
++ [(set (match_operand:SF 0 "register_operand" "")
++ (match_operand:SF 1 "fp_const_high_losum_operand" ""))]
++ "REG_P (operands[0]) && SPARC_INT_REG_P (REGNO (operands[0]))"
++ [(set (match_dup 0) (high:SF (match_dup 1)))
++ (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
++
++(define_expand "movdf"
++ [(set (match_operand:DF 0 "nonimmediate_operand" "")
++ (match_operand:DF 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (DFmode, operands))
++ DONE;
++})
++
++(define_insn "*movdf_insn_sp32"
++ [(set (match_operand:DF 0 "nonimmediate_operand"
++ "=T,o,b,b,e,e,*r, f, e,T,U,T, f,o, *r,*r, o")
++ (match_operand:DF 1 "input_operand"
++ " G,G,G,C,e,e, f,*r,T#F,e,T,U,o#F,f,*rF, o,*r"))]
++ "TARGET_ARCH32
++ && (register_operand (operands[0], DFmode)
++ || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
++ "@
++ stx\t%r1, %0
++ #
++ fzero\t%0
++ fone\t%0
++ fmovd\t%1, %0
++ #
++ #
++ #
++ ldd\t%1, %0
++ std\t%1, %0
++ ldd\t%1, %0
++ std\t%1, %0
++ #
++ #
++ #
++ ldd\t%1, %0
++ std\t%1, %0"
++ [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
++ (set_attr "subtype" "*,*,double,double,*,*,*,*,*,*,regular,*,*,*,*,regular,*")
++ (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
++ (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
++ (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
++ (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
++
++(define_insn "*movdf_insn_sp64"
++ [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r")
++ (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
++ "TARGET_ARCH64
++ && (register_operand (operands[0], DFmode)
++ || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
++ "@
++ fzero\t%0
++ fone\t%0
++ fmovd\t%1, %0
++ movdtox\t%1, %0
++ movxtod\t%1, %0
++ ldd\t%1, %0
++ std\t%1, %0
++ mov\t%r1, %0
++ ldx\t%1, %0
++ stx\t%r1, %0
++ #"
++ [(set_attr "type" "visl,visl,fpmove,vismv,vismv,load,store,*,load,store,*")
++ (set_attr "subtype" "double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
++ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
++ (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
++ (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
++
++;; This pattern builds DFmode constants in integer registers.
++(define_split
++ [(set (match_operand:DF 0 "register_operand" "")
++ (match_operand:DF 1 "const_double_operand" ""))]
++ "reload_completed
++ && REG_P (operands[0])
++ && SPARC_INT_REG_P (REGNO (operands[0]))
++ && !const_zero_operand (operands[1], GET_MODE (operands[0]))"
++ [(clobber (const_int 0))]
++{
++ operands[0] = gen_raw_REG (DImode, REGNO (operands[0]));
++
++ if (TARGET_ARCH64)
++ {
++ rtx tem = simplify_subreg (DImode, operands[1], DFmode, 0);
++ emit_insn (gen_movdi (operands[0], tem));
++ }
++ else
++ {
++ rtx hi = simplify_subreg (SImode, operands[1], DFmode, 0);
++ rtx lo = simplify_subreg (SImode, operands[1], DFmode, 4);
++ rtx high_part = gen_highpart (SImode, operands[0]);
++ rtx low_part = gen_lowpart (SImode, operands[0]);
++
++ gcc_assert (GET_CODE (hi) == CONST_INT);
++ gcc_assert (GET_CODE (lo) == CONST_INT);
++
++ emit_move_insn_1 (high_part, hi);
++
++ /* Slick... but this loses if the constant can be done in one insn. */
++ if (lo == hi
++ && !SPARC_SETHI32_P (INTVAL (hi))
++ && !SPARC_SIMM13_P (INTVAL (hi)))
++ emit_move_insn_1 (low_part, high_part);
++ else
++ emit_move_insn_1 (low_part, lo);
++ }
++ DONE;
++})
++
++;; Ok, now the splits to handle all the multi insn and
++;; mis-aligned memory address cases.
++;; In these splits please take note that we must be
++;; careful when V9 but not ARCH64 because the integer
++;; register DFmode cases must be handled.
++(define_split
++ [(set (match_operand:DF 0 "register_operand" "")
++ (match_operand:DF 1 "const_zero_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && ((GET_CODE (operands[0]) == REG
++ && SPARC_INT_REG_P (REGNO (operands[0])))
++ || (GET_CODE (operands[0]) == SUBREG
++ && GET_CODE (SUBREG_REG (operands[0])) == REG
++ && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
++ [(clobber (const_int 0))]
++{
++ emit_move_insn_1 (gen_highpart (SFmode, operands[0]), CONST0_RTX (SFmode));
++ emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), CONST0_RTX (SFmode));
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DF 0 "register_operand" "")
++ (match_operand:DF 1 "register_operand" ""))]
++ "reload_completed
++ && (!TARGET_V9
++ || (TARGET_ARCH32
++ && sparc_split_reg_reg_legitimate (operands[0], operands[1])))"
++ [(clobber (const_int 0))]
++{
++ sparc_split_reg_reg (operands[0], operands[1], SFmode);
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DF 0 "register_operand" "")
++ (match_operand:DF 1 "memory_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && sparc_split_reg_mem_legitimate (operands[0], operands[1])"
++ [(clobber (const_int 0))]
++{
++ sparc_split_reg_mem (operands[0], operands[1], SFmode);
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DF 0 "memory_operand" "")
++ (match_operand:DF 1 "register_operand" ""))]
++ "reload_completed
++ && TARGET_ARCH32
++ && sparc_split_reg_mem_legitimate (operands[1], operands[0])"
++ [(clobber (const_int 0))]
++{
++ sparc_split_mem_reg (operands[0], operands[1], SFmode);
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:DF 0 "memory_operand" "")
++ (match_operand:DF 1 "const_zero_operand" ""))]
++ "reload_completed
++ && (!TARGET_V9
++ || (TARGET_ARCH32
++ && !mem_min_alignment (operands[0], 8)))
++ && offsettable_memref_p (operands[0])"
++ [(clobber (const_int 0))]
++{
++ emit_move_insn_1 (adjust_address (operands[0], SFmode, 0), CONST0_RTX (SFmode));
++ emit_move_insn_1 (adjust_address (operands[0], SFmode, 4), CONST0_RTX (SFmode));
++ DONE;
++})
++
++(define_expand "movtf"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (match_operand:TF 1 "general_operand" ""))]
++ ""
++{
++ if (sparc_expand_move (TFmode, operands))
++ DONE;
++})
++
++(define_insn "*movtf_insn_sp32"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o, r")
++ (match_operand:TF 1 "input_operand" " G,oe,e,rG,roG"))]
++ "TARGET_ARCH32
++ && (register_operand (operands[0], TFmode)
++ || register_or_zero_operand (operands[1], TFmode))"
++ "#"
++ [(set_attr "length" "4,4,4,4,4")
++ (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
++
++(define_insn "*movtf_insn_sp64"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o, r")
++ (match_operand:TF 1 "input_operand" "G,oe,e,rG,roG"))]
++ "TARGET_ARCH64
++ && !TARGET_HARD_QUAD
++ && (register_operand (operands[0], TFmode)
++ || register_or_zero_operand (operands[1], TFmode))"
++ "#"
++ [(set_attr "length" "2,2,2,2,2")
++ (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
++
++(define_insn "*movtf_insn_sp64_hq"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o, r")
++ (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))]
++ "TARGET_ARCH64
++ && TARGET_HARD_QUAD
++ && (register_operand (operands[0], TFmode)
++ || register_or_zero_operand (operands[1], TFmode))"
++ "@
++ #
++ fmovq\t%1, %0
++ ldq\t%1, %0
++ stq\t%1, %0
++ #
++ #"
++ [(set_attr "type" "*,fpmove,fpload,fpstore,*,*")
++ (set_attr "length" "2,*,*,*,2,2")])
++
++;; Now all the splits to handle multi-insn TF mode moves.
++(define_split
++ [(set (match_operand:TF 0 "register_operand" "")
++ (match_operand:TF 1 "register_operand" ""))]
++ "reload_completed
++ && (TARGET_ARCH32
++ || (TARGET_FPU
++ && !TARGET_HARD_QUAD)
++ || (!fp_register_operand (operands[0], TFmode)
++ && !fp_register_operand (operands[1], TFmode)))"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx set_src = operands[1];
++ rtx dest1, dest2;
++ rtx src1, src2;
++
++ dest1 = gen_df_reg (set_dest, 0);
++ dest2 = gen_df_reg (set_dest, 1);
++ src1 = gen_df_reg (set_src, 0);
++ src2 = gen_df_reg (set_src, 1);
++
++ /* Now emit using the real source and destination we found, swapping
++ the order if we detect overlap. */
++ if (reg_overlap_mentioned_p (dest1, src2))
++ {
++ emit_insn (gen_movdf (dest2, src2));
++ emit_insn (gen_movdf (dest1, src1));
++ }
++ else
++ {
++ emit_insn (gen_movdf (dest1, src1));
++ emit_insn (gen_movdf (dest2, src2));
++ }
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (match_operand:TF 1 "const_zero_operand" ""))]
++ "reload_completed"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx dest1, dest2;
++
++ switch (GET_CODE (set_dest))
++ {
++ case REG:
++ dest1 = gen_df_reg (set_dest, 0);
++ dest2 = gen_df_reg (set_dest, 1);
++ break;
++ case MEM:
++ dest1 = adjust_address (set_dest, DFmode, 0);
++ dest2 = adjust_address (set_dest, DFmode, 8);
++ break;
++ default:
++ gcc_unreachable ();
++ }
++
++ emit_insn (gen_movdf (dest1, CONST0_RTX (DFmode)));
++ emit_insn (gen_movdf (dest2, CONST0_RTX (DFmode)));
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TF 0 "register_operand" "")
++ (match_operand:TF 1 "memory_operand" ""))]
++ "(reload_completed
++ && offsettable_memref_p (operands[1])
++ && (TARGET_ARCH32
++ || !TARGET_HARD_QUAD
++ || !fp_register_operand (operands[0], TFmode)))"
++ [(clobber (const_int 0))]
++{
++ rtx word0 = adjust_address (operands[1], DFmode, 0);
++ rtx word1 = adjust_address (operands[1], DFmode, 8);
++ rtx set_dest, dest1, dest2;
++
++ set_dest = operands[0];
++
++ dest1 = gen_df_reg (set_dest, 0);
++ dest2 = gen_df_reg (set_dest, 1);
++
++ /* Now output, ordering such that we don't clobber any registers
++ mentioned in the address. */
++ if (reg_overlap_mentioned_p (dest1, word1))
++
++ {
++ emit_insn (gen_movdf (dest2, word1));
++ emit_insn (gen_movdf (dest1, word0));
++ }
++ else
++ {
++ emit_insn (gen_movdf (dest1, word0));
++ emit_insn (gen_movdf (dest2, word1));
++ }
++ DONE;
++})
++
++(define_split
++ [(set (match_operand:TF 0 "memory_operand" "")
++ (match_operand:TF 1 "register_operand" ""))]
++ "(reload_completed
++ && offsettable_memref_p (operands[0])
++ && (TARGET_ARCH32
++ || !TARGET_HARD_QUAD
++ || !fp_register_operand (operands[1], TFmode)))"
++ [(clobber (const_int 0))]
++{
++ rtx set_src = operands[1];
++
++ emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 0),
++ gen_df_reg (set_src, 0)));
++ emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 8),
++ gen_df_reg (set_src, 1)));
++ DONE;
++})
++
++
++;; SPARC-V9 conditional move instructions
++
++;; We can handle larger constants here for some flavors, but for now we keep
++;; it simple and only allow those constants supported by all flavors.
++;; Note that emit_conditional_move canonicalizes operands 2,3 so that operand
++;; 3 contains the constant if one is present, but we handle either for
++;; generality (sparc.c puts a constant in operand 2).
++;;
++;; Our instruction patterns, on the other hand, canonicalize such that
++;; operand 3 must be the set destination.
++
++(define_expand "mov<I:mode>cc"
++ [(set (match_operand:I 0 "register_operand" "")
++ (if_then_else:I (match_operand 1 "comparison_operator" "")
++ (match_operand:I 2 "arith10_operand" "")
++ (match_operand:I 3 "arith10_operand" "")))]
++ "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
++{
++ if (!sparc_expand_conditional_move (<I:MODE>mode, operands))
++ FAIL;
++ DONE;
++})
++
++(define_expand "mov<F:mode>cc"
++ [(set (match_operand:F 0 "register_operand" "")
++ (if_then_else:F (match_operand 1 "comparison_operator" "")
++ (match_operand:F 2 "register_operand" "")
++ (match_operand:F 3 "register_operand" "")))]
++ "TARGET_V9 && TARGET_FPU"
++{
++ if (!sparc_expand_conditional_move (<F:MODE>mode, operands))
++ FAIL;
++ DONE;
++})
++
++(define_insn "*mov<I:mode>_cc_v9"
++ [(set (match_operand:I 0 "register_operand" "=r")
++ (if_then_else:I (match_operator 1 "icc_or_fcc_comparison_operator"
++ [(match_operand 2 "icc_or_fcc_register_operand" "X")
++ (const_int 0)])
++ (match_operand:I 3 "arith11_operand" "rL")
++ (match_operand:I 4 "register_operand" "0")))]
++ "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
++ "mov%C1\t%x2, %3, %0"
++ [(set_attr "type" "cmove")])
++
++(define_insn "*mov<I:mode>_cc_reg_sp64"
++ [(set (match_operand:I 0 "register_operand" "=r")
++ (if_then_else:I (match_operator 1 "v9_register_comparison_operator"
++ [(match_operand:DI 2 "register_operand" "r")
++ (const_int 0)])
++ (match_operand:I 3 "arith10_operand" "rM")
++ (match_operand:I 4 "register_operand" "0")))]
++ "TARGET_ARCH64"
++ "movr%D1\t%2, %r3, %0"
++ [(set_attr "type" "cmove")])
++
++(define_insn "*movsf_cc_v9"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (if_then_else:SF (match_operator 1 "icc_or_fcc_comparison_operator"
++ [(match_operand 2 "icc_or_fcc_register_operand" "X")
++ (const_int 0)])
++ (match_operand:SF 3 "register_operand" "f")
++ (match_operand:SF 4 "register_operand" "0")))]
++ "TARGET_V9 && TARGET_FPU"
++ "fmovs%C1\t%x2, %3, %0"
++ [(set_attr "type" "fpcmove")])
++
++(define_insn "*movsf_cc_reg_sp64"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (if_then_else:SF (match_operator 1 "v9_register_comparison_operator"
++ [(match_operand:DI 2 "register_operand" "r")
++ (const_int 0)])
++ (match_operand:SF 3 "register_operand" "f")
++ (match_operand:SF 4 "register_operand" "0")))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "fmovrs%D1\t%2, %3, %0"
++ [(set_attr "type" "fpcrmove")])
++
++;; Named because invoked by movtf_cc_v9
++(define_insn "movdf_cc_v9"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (if_then_else:DF (match_operator 1 "icc_or_fcc_comparison_operator"
++ [(match_operand 2 "icc_or_fcc_register_operand" "X")
++ (const_int 0)])
++ (match_operand:DF 3 "register_operand" "e")
++ (match_operand:DF 4 "register_operand" "0")))]
++ "TARGET_V9 && TARGET_FPU"
++ "fmovd%C1\t%x2, %3, %0"
++ [(set_attr "type" "fpcmove")
++ (set_attr "fptype" "double")])
++
++;; Named because invoked by movtf_cc_reg_sp64
++(define_insn "movdf_cc_reg_sp64"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (if_then_else:DF (match_operator 1 "v9_register_comparison_operator"
++ [(match_operand:DI 2 "register_operand" "r")
++ (const_int 0)])
++ (match_operand:DF 3 "register_operand" "e")
++ (match_operand:DF 4 "register_operand" "0")))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "fmovrd%D1\t%2, %3, %0"
++ [(set_attr "type" "fpcrmove")
++ (set_attr "fptype" "double")])
++
++(define_insn "*movtf_cc_hq_v9"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (if_then_else:TF (match_operator 1 "icc_or_fcc_comparison_operator"
++ [(match_operand 2 "icc_or_fcc_register_operand" "X")
++ (const_int 0)])
++ (match_operand:TF 3 "register_operand" "e")
++ (match_operand:TF 4 "register_operand" "0")))]
++ "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
++ "fmovq%C1\t%x2, %3, %0"
++ [(set_attr "type" "fpcmove")])
++
++(define_insn "*movtf_cc_reg_hq_sp64"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (if_then_else:TF (match_operator 1 "v9_register_comparison_operator"
++ [(match_operand:DI 2 "register_operand" "r")
++ (const_int 0)])
++ (match_operand:TF 3 "register_operand" "e")
++ (match_operand:TF 4 "register_operand" "0")))]
++ "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
++ "fmovrq%D1\t%2, %3, %0"
++ [(set_attr "type" "fpcrmove")])
++
++(define_insn_and_split "*movtf_cc_v9"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (if_then_else:TF (match_operator 1 "icc_or_fcc_comparison_operator"
++ [(match_operand 2 "icc_or_fcc_register_operand" "X")
++ (const_int 0)])
++ (match_operand:TF 3 "register_operand" "e")
++ (match_operand:TF 4 "register_operand" "0")))]
++ "TARGET_V9 && TARGET_FPU && !TARGET_HARD_QUAD"
++ "#"
++ "&& reload_completed"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx set_srca = operands[3];
++ rtx dest1, dest2;
++ rtx srca1, srca2;
++
++ dest1 = gen_df_reg (set_dest, 0);
++ dest2 = gen_df_reg (set_dest, 1);
++ srca1 = gen_df_reg (set_srca, 0);
++ srca2 = gen_df_reg (set_srca, 1);
++
++ if (reg_overlap_mentioned_p (dest1, srca2))
++ {
++ emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2],
++ srca2, dest2));
++ emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2],
++ srca1, dest1));
++ }
++ else
++ {
++ emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2],
++ srca1, dest1));
++ emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2],
++ srca2, dest2));
++ }
++ DONE;
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*movtf_cc_reg_sp64"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (if_then_else:TF (match_operator 1 "v9_register_comparison_operator"
++ [(match_operand:DI 2 "register_operand" "r")
++ (const_int 0)])
++ (match_operand:TF 3 "register_operand" "e")
++ (match_operand:TF 4 "register_operand" "0")))]
++ "TARGET_ARCH64 && TARGET_FPU && !TARGET_HARD_QUAD"
++ "#"
++ "&& reload_completed"
++ [(clobber (const_int 0))]
++{
++ rtx set_dest = operands[0];
++ rtx set_srca = operands[3];
++ rtx dest1, dest2;
++ rtx srca1, srca2;
++
++ dest1 = gen_df_reg (set_dest, 0);
++ dest2 = gen_df_reg (set_dest, 1);
++ srca1 = gen_df_reg (set_srca, 0);
++ srca2 = gen_df_reg (set_srca, 1);
++
++ if (reg_overlap_mentioned_p (dest1, srca2))
++ {
++ emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2],
++ srca2, dest2));
++ emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2],
++ srca1, dest1));
++ }
++ else
++ {
++ emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2],
++ srca1, dest1));
++ emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2],
++ srca2, dest2));
++ }
++ DONE;
++}
++ [(set_attr "length" "2")])
++
++
++;; Zero-extension instructions
++
++;; These patterns originally accepted general_operands, however, slightly
++;; better code is generated by only accepting register_operands, and then
++;; letting combine generate the ldu[hb] insns.
++
++(define_expand "zero_extendhisi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
++ ""
++{
++ rtx temp = gen_reg_rtx (SImode);
++ rtx shift_16 = GEN_INT (16);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (SImode);
++ op1_subbyte *= GET_MODE_SIZE (SImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
++ shift_16));
++ emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
++ DONE;
++})
++
++(define_insn "*zero_extendhisi2_insn"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
++ ""
++ "lduh\t%1, %0"
++ [(set_attr "type" "load")
++ (set_attr "subtype" "regular")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "zero_extendqihi2"
++ [(set (match_operand:HI 0 "register_operand" "")
++ (zero_extend:HI (match_operand:QI 1 "register_operand" "")))]
++ ""
++ "")
++
++(define_insn "*zero_extendqihi2_insn"
++ [(set (match_operand:HI 0 "register_operand" "=r,r")
++ (zero_extend:HI (match_operand:QI 1 "input_operand" "r,m")))]
++ "GET_CODE (operands[1]) != CONST_INT"
++ "@
++ and\t%1, 0xff, %0
++ ldub\t%1, %0"
++ [(set_attr "type" "*,load")
++ (set_attr "subtype" "*,regular")
++ (set_attr "us3load_type" "*,3cycle")])
++
++(define_expand "zero_extendqisi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (zero_extend:SI (match_operand:QI 1 "register_operand" "")))]
++ ""
++ "")
++
++(define_insn "*zero_extendqisi2_insn"
++ [(set (match_operand:SI 0 "register_operand" "=r,r")
++ (zero_extend:SI (match_operand:QI 1 "input_operand" "r,m")))]
++ "GET_CODE (operands[1]) != CONST_INT"
++ "@
++ and\t%1, 0xff, %0
++ ldub\t%1, %0"
++ [(set_attr "type" "*,load")
++ (set_attr "subtype" "*,regular")
++ (set_attr "us3load_type" "*,3cycle")])
++
++(define_expand "zero_extendqidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (zero_extend:DI (match_operand:QI 1 "register_operand" "")))]
++ "TARGET_ARCH64"
++ "")
++
++(define_insn "*zero_extendqidi2_insn"
++ [(set (match_operand:DI 0 "register_operand" "=r,r")
++ (zero_extend:DI (match_operand:QI 1 "input_operand" "r,m")))]
++ "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
++ "@
++ and\t%1, 0xff, %0
++ ldub\t%1, %0"
++ [(set_attr "type" "*,load")
++ (set_attr "subtype" "*,regular")
++ (set_attr "us3load_type" "*,3cycle")])
++
++(define_expand "zero_extendhidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (zero_extend:DI (match_operand:HI 1 "register_operand" "")))]
++ "TARGET_ARCH64"
++{
++ rtx temp = gen_reg_rtx (DImode);
++ rtx shift_48 = GEN_INT (48);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (DImode);
++ op1_subbyte *= GET_MODE_SIZE (DImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
++ shift_48));
++ emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
++ DONE;
++})
++
++(define_insn "*zero_extendhidi2_insn"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
++ "TARGET_ARCH64"
++ "lduh\t%1, %0"
++ [(set_attr "type" "load")
++ (set_attr "subtype" "regular")
++ (set_attr "us3load_type" "3cycle")])
++
++;; ??? Write truncdisi pattern using sra?
++
++(define_expand "zero_extendsidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
++ ""
++ "")
++
++(define_insn "*zero_extendsidi2_insn_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
++ (zero_extend:DI (match_operand:SI 1 "input_operand" "r,m,*f")))]
++ "TARGET_ARCH64
++ && GET_CODE (operands[1]) != CONST_INT"
++ "@
++ srl\t%1, 0, %0
++ lduw\t%1, %0
++ movstouw\t%1, %0"
++ [(set_attr "type" "shift,load,vismv")
++ (set_attr "subtype" "*,regular,movstouw")
++ (set_attr "cpu_feature" "*,*,vis3")])
++
++(define_insn_and_split "*zero_extendsidi2_insn_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(set (match_dup 2) (match_dup 1))
++ (set (match_dup 3) (const_int 0))]
++ "operands[2] = gen_lowpart (SImode, operands[0]);
++ operands[3] = gen_highpart (SImode, operands[0]);"
++ [(set_attr "length" "2")])
++
++;; Simplify comparisons of extended values.
++
++(define_insn "*cmp_zero_extendqisi2"
++ [(set (reg:CC CC_REG)
++ (compare:CC (zero_extend:SI (match_operand:QI 0 "register_operand" "r"))
++ (const_int 0)))]
++ ""
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_qi"
++ [(set (reg:CC CC_REG)
++ (compare:CC (match_operand:QI 0 "register_operand" "r")
++ (const_int 0)))]
++ ""
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extendqisi2_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
++ (const_int 0)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (zero_extend:SI (match_dup 1)))]
++ ""
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extendqisi2_andcc_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (and:SI (match_operand:SI 1 "register_operand" "r")
++ (const_int 255))
++ (const_int 0)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (zero_extend:SI (subreg:QI (match_dup 1) 0)))]
++ ""
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extendqidi2"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (zero_extend:DI (match_operand:QI 0 "register_operand" "r"))
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_qi_sp64"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (match_operand:QI 0 "register_operand" "r")
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extendqidi2_set"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
++ (const_int 0)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (match_dup 1)))]
++ "TARGET_ARCH64"
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extendqidi2_andcc_set"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (and:DI (match_operand:DI 1 "register_operand" "r")
++ (const_int 255))
++ (const_int 0)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (subreg:QI (match_dup 1) 0)))]
++ "TARGET_ARCH64"
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++;; Similarly, handle {SI,DI}->QI mode truncation followed by a compare.
++
++(define_insn "*cmp_siqi_trunc"
++ [(set (reg:CC CC_REG)
++ (compare:CC (subreg:QI (match_operand:SI 0 "register_operand" "r") 3)
++ (const_int 0)))]
++ ""
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_siqi_trunc_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (subreg:QI (match_operand:SI 1 "register_operand" "r") 3)
++ (const_int 0)))
++ (set (match_operand:QI 0 "register_operand" "=r")
++ (subreg:QI (match_dup 1) 3))]
++ ""
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_diqi_trunc"
++ [(set (reg:CC CC_REG)
++ (compare:CC (subreg:QI (match_operand:DI 0 "register_operand" "r") 7)
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "andcc\t%0, 0xff, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_diqi_trunc_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (subreg:QI (match_operand:DI 1 "register_operand" "r") 7)
++ (const_int 0)))
++ (set (match_operand:QI 0 "register_operand" "=r")
++ (subreg:QI (match_dup 1) 7))]
++ "TARGET_ARCH64"
++ "andcc\t%1, 0xff, %0"
++ [(set_attr "type" "compare")])
++
++
++;; Sign-extension instructions
++
++;; These patterns originally accepted general_operands, however, slightly
++;; better code is generated by only accepting register_operands, and then
++;; letting combine generate the lds[hb] insns.
++
++(define_expand "extendhisi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
++ ""
++{
++ rtx temp = gen_reg_rtx (SImode);
++ rtx shift_16 = GEN_INT (16);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (SImode);
++ op1_subbyte *= GET_MODE_SIZE (SImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
++ shift_16));
++ emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
++ DONE;
++})
++
++(define_insn "*sign_extendhisi2_insn"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
++ ""
++ "ldsh\t%1, %0"
++ [(set_attr "type" "sload")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "extendqihi2"
++ [(set (match_operand:HI 0 "register_operand" "")
++ (sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
++ ""
++{
++ rtx temp = gen_reg_rtx (SImode);
++ rtx shift_24 = GEN_INT (24);
++ int op1_subbyte = 0;
++ int op0_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (SImode);
++ op1_subbyte *= GET_MODE_SIZE (SImode);
++ operand1 = XEXP (operand1, 0);
++ }
++ if (GET_CODE (operand0) == SUBREG)
++ {
++ op0_subbyte = SUBREG_BYTE (operand0);
++ op0_subbyte /= GET_MODE_SIZE (SImode);
++ op0_subbyte *= GET_MODE_SIZE (SImode);
++ operand0 = XEXP (operand0, 0);
++ }
++ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
++ shift_24));
++ if (GET_MODE (operand0) != SImode)
++ operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
++ emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
++ DONE;
++})
++
++(define_insn "*sign_extendqihi2_insn"
++ [(set (match_operand:HI 0 "register_operand" "=r")
++ (sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
++ ""
++ "ldsb\t%1, %0"
++ [(set_attr "type" "sload")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "extendqisi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
++ ""
++{
++ rtx temp = gen_reg_rtx (SImode);
++ rtx shift_24 = GEN_INT (24);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (SImode);
++ op1_subbyte *= GET_MODE_SIZE (SImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
++ shift_24));
++ emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
++ DONE;
++})
++
++(define_insn "*sign_extendqisi2_insn"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
++ ""
++ "ldsb\t%1, %0"
++ [(set_attr "type" "sload")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "extendqidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (sign_extend:DI (match_operand:QI 1 "register_operand" "")))]
++ "TARGET_ARCH64"
++{
++ rtx temp = gen_reg_rtx (DImode);
++ rtx shift_56 = GEN_INT (56);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (DImode);
++ op1_subbyte *= GET_MODE_SIZE (DImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
++ shift_56));
++ emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
++ DONE;
++})
++
++(define_insn "*sign_extendqidi2_insn"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
++ "TARGET_ARCH64"
++ "ldsb\t%1, %0"
++ [(set_attr "type" "sload")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "extendhidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (sign_extend:DI (match_operand:HI 1 "register_operand" "")))]
++ "TARGET_ARCH64"
++{
++ rtx temp = gen_reg_rtx (DImode);
++ rtx shift_48 = GEN_INT (48);
++ int op1_subbyte = 0;
++
++ if (GET_CODE (operand1) == SUBREG)
++ {
++ op1_subbyte = SUBREG_BYTE (operand1);
++ op1_subbyte /= GET_MODE_SIZE (DImode);
++ op1_subbyte *= GET_MODE_SIZE (DImode);
++ operand1 = XEXP (operand1, 0);
++ }
++
++ emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
++ shift_48));
++ emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
++ DONE;
++})
++
++(define_insn "*sign_extendhidi2_insn"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
++ "TARGET_ARCH64"
++ "ldsh\t%1, %0"
++ [(set_attr "type" "sload")
++ (set_attr "us3load_type" "3cycle")])
++
++(define_expand "extendsidi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))]
++ "TARGET_ARCH64"
++ "")
++
++(define_insn "*sign_extendsidi2_insn"
++ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
++ (sign_extend:DI (match_operand:SI 1 "input_operand" "r,m,*f")))]
++ "TARGET_ARCH64"
++ "@
++ sra\t%1, 0, %0
++ ldsw\t%1, %0
++ movstosw\t%1, %0"
++ [(set_attr "type" "shift,sload,vismv")
++ (set_attr "us3load_type" "*,3cycle,*")
++ (set_attr "cpu_feature" "*,*,vis3")])
++
++
++;; Special pattern for optimizing bit-field compares. This is needed
++;; because combine uses this as a canonical form.
++
++(define_insn "*cmp_zero_extract"
++ [(set (reg:CC CC_REG)
++ (compare:CC
++ (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
++ (match_operand:SI 1 "small_int_operand" "I")
++ (match_operand:SI 2 "small_int_operand" "I"))
++ (const_int 0)))]
++ "INTVAL (operands[2]) > 19"
++{
++ int len = INTVAL (operands[1]);
++ int pos = 32 - INTVAL (operands[2]) - len;
++ HOST_WIDE_INT mask = ((1 << len) - 1) << pos;
++ operands[1] = GEN_INT (mask);
++ return "andcc\t%0, %1, %%g0";
++}
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_zero_extract_sp64"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX
++ (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
++ (match_operand:SI 1 "small_int_operand" "I")
++ (match_operand:SI 2 "small_int_operand" "I"))
++ (const_int 0)))]
++ "TARGET_ARCH64 && INTVAL (operands[2]) > 51"
++{
++ int len = INTVAL (operands[1]);
++ int pos = 64 - INTVAL (operands[2]) - len;
++ HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos;
++ operands[1] = GEN_INT (mask);
++ return "andcc\t%0, %1, %%g0";
++}
++ [(set_attr "type" "compare")])
++
++
++;; Conversions between float, double and long double.
++
++(define_insn "extendsfdf2"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
++ "TARGET_FPU"
++ "fstod\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "extendsftf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (float_extend:TF (match_operand:SF 1 "register_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
++
++(define_insn "*extendsftf2_hq"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (float_extend:TF (match_operand:SF 1 "register_operand" "f")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fstoq\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "extenddftf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (float_extend:TF (match_operand:DF 1 "register_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
++
++(define_insn "*extenddftf2_hq"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (float_extend:TF (match_operand:DF 1 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fdtoq\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_insn "truncdfsf2"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (float_truncate:SF (match_operand:DF 1 "register_operand" "e")))]
++ "TARGET_FPU"
++ "fdtos\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")
++ (set_attr "fptype_ut699" "single")])
++
++(define_expand "trunctfsf2"
++ [(set (match_operand:SF 0 "register_operand" "")
++ (float_truncate:SF (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
++
++(define_insn "*trunctfsf2_hq"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (float_truncate:SF (match_operand:TF 1 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fqtos\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "trunctfdf2"
++ [(set (match_operand:DF 0 "register_operand" "")
++ (float_truncate:DF (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
++
++(define_insn "*trunctfdf2_hq"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (float_truncate:DF (match_operand:TF 1 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fqtod\t%1, %0"
++ [(set_attr "type" "fp")])
++
++
++;; Conversion between fixed point and floating point.
++
++(define_insn "floatsisf2"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (float:SF (match_operand:SI 1 "register_operand" "f")))]
++ "TARGET_FPU"
++ "fitos\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "single")])
++
++(define_insn "floatsidf2"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (float:DF (match_operand:SI 1 "register_operand" "f")))]
++ "TARGET_FPU"
++ "fitod\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "floatsitf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (float:TF (match_operand:SI 1 "register_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT, operands); DONE;")
++
++(define_insn "*floatsitf2_hq"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (float:TF (match_operand:SI 1 "register_operand" "f")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fitoq\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "floatunssitf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (unsigned_float:TF (match_operand:SI 1 "register_operand" "")))]
++ "TARGET_FPU && TARGET_ARCH64 && !TARGET_HARD_QUAD"
++ "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
++
++;; Now the same for 64 bit sources.
++
++(define_insn "floatdisf2"
++ [(set (match_operand:SF 0 "register_operand" "=f")
++ (float:SF (match_operand:DI 1 "register_operand" "e")))]
++ "TARGET_V9 && TARGET_FPU"
++ "fxtos\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "floatunsdisf2"
++ [(use (match_operand:SF 0 "register_operand" ""))
++ (use (match_operand:DI 1 "general_operand" ""))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "sparc_emit_floatunsdi (operands, SFmode); DONE;")
++
++(define_insn "floatdidf2"
++ [(set (match_operand:DF 0 "register_operand" "=e")
++ (float:DF (match_operand:DI 1 "register_operand" "e")))]
++ "TARGET_V9 && TARGET_FPU"
++ "fxtod\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "floatunsdidf2"
++ [(use (match_operand:DF 0 "register_operand" ""))
++ (use (match_operand:DI 1 "general_operand" ""))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "sparc_emit_floatunsdi (operands, DFmode); DONE;")
++
++(define_expand "floatditf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (float:TF (match_operand:DI 1 "register_operand" "")))]
++ "TARGET_FPU && TARGET_V9 && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FLOAT, operands); DONE;")
++
++(define_insn "*floatditf2_hq"
++ [(set (match_operand:TF 0 "register_operand" "=e")
++ (float:TF (match_operand:DI 1 "register_operand" "e")))]
++ "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
++ "fxtoq\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "floatunsditf2"
++ [(set (match_operand:TF 0 "nonimmediate_operand" "")
++ (unsigned_float:TF (match_operand:DI 1 "register_operand" "")))]
++ "TARGET_FPU && TARGET_ARCH64 && !TARGET_HARD_QUAD"
++ "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
++
++;; Convert a float to an actual integer.
++;; Truncation is performed as part of the conversion.
++
++(define_insn "fix_truncsfsi2"
++ [(set (match_operand:SI 0 "register_operand" "=f")
++ (fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
++ "TARGET_FPU"
++ "fstoi\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "single")])
++
++(define_insn "fix_truncdfsi2"
++ [(set (match_operand:SI 0 "register_operand" "=f")
++ (fix:SI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
++ "TARGET_FPU"
++ "fdtoi\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")
++ (set_attr "fptype_ut699" "single")])
++
++(define_expand "fix_trunctfsi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (fix:SI (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FIX, operands); DONE;")
++
++(define_insn "*fix_trunctfsi2_hq"
++ [(set (match_operand:SI 0 "register_operand" "=f")
++ (fix:SI (match_operand:TF 1 "register_operand" "e")))]
++ "TARGET_FPU && TARGET_HARD_QUAD"
++ "fqtoi\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "fixuns_trunctfsi2"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (unsigned_fix:SI (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_FPU && TARGET_ARCH64 && !TARGET_HARD_QUAD"
++ "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
++
++;; Now the same, for V9 targets
++
++(define_insn "fix_truncsfdi2"
++ [(set (match_operand:DI 0 "register_operand" "=e")
++ (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
++ "TARGET_V9 && TARGET_FPU"
++ "fstox\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "fixuns_truncsfdi2"
++ [(use (match_operand:DI 0 "register_operand" ""))
++ (use (match_operand:SF 1 "general_operand" ""))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "sparc_emit_fixunsdi (operands, SFmode); DONE;")
++
++(define_insn "fix_truncdfdi2"
++ [(set (match_operand:DI 0 "register_operand" "=e")
++ (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
++ "TARGET_V9 && TARGET_FPU"
++ "fdtox\t%1, %0"
++ [(set_attr "type" "fp")
++ (set_attr "fptype" "double")])
++
++(define_expand "fixuns_truncdfdi2"
++ [(use (match_operand:DI 0 "register_operand" ""))
++ (use (match_operand:DF 1 "general_operand" ""))]
++ "TARGET_ARCH64 && TARGET_FPU"
++ "sparc_emit_fixunsdi (operands, DFmode); DONE;")
++
++(define_expand "fix_trunctfdi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (fix:DI (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_V9 && TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
++ "emit_tfmode_cvt (FIX, operands); DONE;")
++
++(define_insn "*fix_trunctfdi2_hq"
++ [(set (match_operand:DI 0 "register_operand" "=e")
++ (fix:DI (match_operand:TF 1 "register_operand" "e")))]
++ "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
++ "fqtox\t%1, %0"
++ [(set_attr "type" "fp")])
++
++(define_expand "fixuns_trunctfdi2"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (unsigned_fix:DI (match_operand:TF 1 "general_operand" "")))]
++ "TARGET_FPU && TARGET_ARCH64 && !TARGET_HARD_QUAD"
++ "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
++
++
++;; Integer addition/subtraction instructions.
++
++(define_expand "adddi3"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (plus:DI (match_operand:DI 1 "register_operand" "")
++ (match_operand:DI 2 "arith_double_add_operand" "")))]
++ ""
++{
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_adddi3_sp32 (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++})
++
++;; Turning an add/sub instruction into the other changes the Carry flag
++;; so the 4096 trick cannot be used for operations in CCXCmode.
++
++(define_expand "uaddvdi4"
++ [(parallel [(set (reg:CCXC CC_REG)
++ (compare:CCXC (plus:DI (match_operand:DI 1 "register_operand")
++ (match_operand:DI 2 "arith_double_operand"))
++ (match_dup 1)))
++ (set (match_operand:DI 0 "register_operand")
++ (plus:DI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ltu (reg:CCXC CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ ""
++{
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_uaddvdi4_sp32 (operands[0], operands[1], operands[2]));
++ rtx x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCCmode, SPARC_ICC_REG),
++ const0_rtx);
++ emit_jump_insn (gen_cbranchcc4 (x, XEXP (x, 0), XEXP (x, 1), operands[3]));
++ DONE;
++ }
++})
++
++;; Turning an add/sub instruction into the other does not change the Overflow
++;; flag so the 4096 trick can be used for operations in CCXVmode.
++
++(define_expand "addvdi4"
++ [(parallel [(set (reg:CCXV CC_REG)
++ (compare:CCXV (plus:DI (match_operand:DI 1 "register_operand")
++ (match_operand:DI 2 "arith_double_add_operand"))
++ (unspec:DI [(match_dup 1) (match_dup 2)]
++ UNSPEC_ADDV)))
++ (set (match_operand:DI 0 "register_operand")
++ (plus:DI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ne (reg:CCXV CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ ""
++{
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_addvdi4_sp32 (operands[0], operands[1], operands[2]));
++ rtx x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CCVmode, SPARC_ICC_REG),
++ const0_rtx);
++ emit_jump_insn (gen_cbranchcc4 (x, XEXP (x, 0), XEXP (x, 1), operands[3]));
++ DONE;
++ }
++})
++
++(define_insn_and_split "adddi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=&r")
++ (plus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_double_operand" "rHI")))
++ (clobber (reg:CC CC_REG))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_dup 4) (match_dup 5))
++ (match_dup 4)))
++ (set (match_dup 3)
++ (plus:SI (match_dup 4) (match_dup 5)))])
++ (set (match_dup 6)
++ (plus:SI (plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "uaddvdi4_sp32"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_double_operand" "rHI"))
++ (match_dup 1)))
++ (set (match_operand:DI 0 "register_operand" "=&r")
++ (plus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_dup 4) (match_dup 5))
++ (match_dup 4)))
++ (set (match_dup 3)
++ (plus:SI (match_dup 4) (match_dup 5)))])
++ (parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (zero_extend:DI
++ (plus:SI (plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG)
++ (const_int 0))))
++ (plus:DI (plus:DI (zero_extend:DI (match_dup 7))
++ (zero_extend:DI (match_dup 8)))
++ (ltu:DI (reg:CCC CC_REG)
++ (const_int 0)))))
++ (set (match_dup 6)
++ (plus:SI (plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG)
++ (const_int 0))))])]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "addvdi4_sp32"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_double_operand" "rHI"))
++ (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_ADDV)))
++ (set (match_operand:DI 0 "register_operand" "=&r")
++ (plus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_dup 4) (match_dup 5))
++ (match_dup 4)))
++ (set (match_dup 3)
++ (plus:SI (match_dup 4) (match_dup 5)))])
++ (parallel [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:SI (plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG)
++ (const_int 0)))
++ (unspec:SI [(plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG)
++ (const_int 0))]
++ UNSPEC_ADDV)))
++ (set (match_dup 6)
++ (plus:SI (plus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))])]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*addx_extend_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (plus:SI (plus:SI
++ (match_operand:SI 1 "register_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0)))))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(set (match_dup 3) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))
++ (set (match_dup 4) (const_int 0))]
++ "operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_highpart (SImode, operands[0]);"
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*adddi3_extend_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=&r")
++ (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "register_operand" "r")))
++ (clobber (reg:CC CC_REG))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_dup 3) (match_dup 1))
++ (match_dup 3)))
++ (set (match_dup 5) (plus:SI (match_dup 3) (match_dup 1)))])
++ (set (match_dup 6)
++ (plus:SI (plus:SI (match_dup 4) (const_int 0))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))]
++ "operands[3] = gen_lowpart (SImode, operands[2]);
++ operands[4] = gen_highpart (SImode, operands[2]);
++ operands[5] = gen_lowpart (SImode, operands[0]);
++ operands[6] = gen_highpart (SImode, operands[0]);"
++ [(set_attr "length" "2")])
++
++(define_insn "*adddi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r,r")
++ (plus:DI (match_operand:DI 1 "register_operand" "%r,r")
++ (match_operand:DI 2 "arith_add_operand" "rI,O")))]
++ "TARGET_ARCH64"
++ "@
++ add\t%1, %2, %0
++ sub\t%1, -%2, %0")
++
++(define_insn "addsi3"
++ [(set (match_operand:SI 0 "register_operand" "=r,r")
++ (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
++ (match_operand:SI 2 "arith_add_operand" "rI,O")))]
++ ""
++ "@
++ add\t%1, %2, %0
++ sub\t%1, -%2, %0")
++
++;; Turning an add/sub instruction into the other changes the Carry flag
++;; so the 4096 trick cannot be used for operations in CCCmode.
++
++(define_expand "uaddvsi4"
++ [(parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_operand:SI 1 "register_operand")
++ (match_operand:SI 2 "arith_operand"))
++ (match_dup 1)))
++ (set (match_operand:SI 0 "register_operand")
++ (plus:SI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ltu (reg:CCC CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ "")
++
++;; Turning an add/sub instruction into the other does not change the Overflow
++;; flag so the 4096 trick can be used for operations in CCVmode.
++
++(define_expand "addvsi4"
++ [(parallel [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:SI (match_operand:SI 1 "register_operand")
++ (match_operand:SI 2 "arith_add_operand"))
++ (unspec:SI [(match_dup 1) (match_dup 2)]
++ UNSPEC_ADDV)))
++ (set (match_operand:SI 0 "register_operand")
++ (plus:SI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ne (reg:CCV CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ "")
++
++(define_insn "*cmp_ccnz_plus"
++ [(set (reg:CCNZ CC_REG)
++ (compare:CCNZ (plus:SI (match_operand:SI 0 "register_operand" "%r")
++ (match_operand:SI 1 "arith_operand" "rI"))
++ (const_int 0)))]
++ ""
++ "addcc\t%0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxnz_plus"
++ [(set (reg:CCXNZ CC_REG)
++ (compare:CCXNZ (plus:DI (match_operand:DI 0 "register_operand" "%r")
++ (match_operand:DI 1 "arith_operand" "rI"))
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "addcc\t%0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccnz_plus_set"
++ [(set (reg:CCNZ CC_REG)
++ (compare:CCNZ (plus:SI (match_operand:SI 1 "register_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (const_int 0)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (plus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "addcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxnz_plus_set"
++ [(set (reg:CCXNZ CC_REG)
++ (compare:CCXNZ (plus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_operand" "rI"))
++ (const_int 0)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (plus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "addcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccc_plus"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_operand:SI 0 "register_operand" "%r")
++ (match_operand:SI 1 "arith_operand" "rI"))
++ (match_dup 0)))]
++ ""
++ "addcc\t%0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxc_plus"
++ [(set (reg:CCXC CC_REG)
++ (compare:CCXC (plus:DI (match_operand:DI 0 "register_operand" "%r")
++ (match_operand:DI 1 "arith_operand" "rI"))
++ (match_dup 0)))]
++ "TARGET_ARCH64"
++ "addcc\t%0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccc_plus_set"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (plus:SI (match_operand:SI 1 "register_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (match_dup 1)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (plus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "addcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxc_plus_set"
++ [(set (reg:CCXC CC_REG)
++ (compare:CCXC (plus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_operand" "rI"))
++ (match_dup 1)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (plus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "addcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccc_plus_sltu_set"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (zero_extend:DI
++ (plus:SI
++ (plus:SI (match_operand:SI 1 "register_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))
++ (plus:DI (plus:DI (zero_extend:DI (match_dup 1))
++ (zero_extend:DI (match_dup 2)))
++ (ltu:DI (reg:CCC CC_REG) (const_int 0)))))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (plus:SI (plus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ "addxcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_plus"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:SI (match_operand:SI 0 "register_operand" "%r,r")
++ (match_operand:SI 1 "arith_add_operand" "rI,O"))
++ (unspec:SI [(match_dup 0) (match_dup 1)] UNSPEC_ADDV)))]
++ ""
++ "@
++ addcc\t%0, %1, %%g0
++ subcc\t%0, -%1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxv_plus"
++ [(set (reg:CCXV CC_REG)
++ (compare:CCXV (plus:DI (match_operand:DI 0 "register_operand" "%r,r")
++ (match_operand:DI 1 "arith_add_operand" "rI,O"))
++ (unspec:DI [(match_dup 0) (match_dup 1)] UNSPEC_ADDV)))]
++ "TARGET_ARCH64"
++ "@
++ addcc\t%0, %1, %%g0
++ subcc\t%0, -%1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_plus_set"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
++ (match_operand:SI 2 "arith_add_operand" "rI,O"))
++ (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_ADDV)))
++ (set (match_operand:SI 0 "register_operand" "=r,r")
++ (plus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "@
++ addcc\t%1, %2, %0
++ subcc\t%1, -%2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxv_plus_set"
++ [(set (reg:CCXV CC_REG)
++ (compare:CCXV (plus:DI (match_operand:DI 1 "register_operand" "%r,r")
++ (match_operand:DI 2 "arith_add_operand" "rI,O"))
++ (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_ADDV)))
++ (set (match_operand:DI 0 "register_operand" "=r,r")
++ (plus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "@
++ addcc\t%1, %2, %0
++ subcc\t%1, -%2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_plus_sltu_set"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0)))
++ (unspec:SI [(plus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))]
++ UNSPEC_ADDV)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (plus:SI (plus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))]
++ ""
++ "addxcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++
++(define_expand "subdi3"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (minus:DI (match_operand:DI 1 "register_operand" "")
++ (match_operand:DI 2 "arith_double_add_operand" "")))]
++ ""
++{
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_subdi3_sp32 (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++})
++
++;; Turning an add/sub instruction into the other changes the Carry flag
++;; so the 4096 trick cannot be used for operations in CCXmode.
++
++(define_expand "usubvdi4"
++ [(parallel [(set (reg:CCX CC_REG)
++ (compare:CCX (match_operand:DI 1 "register_or_zero_operand")
++ (match_operand:DI 2 "arith_double_operand")))
++ (set (match_operand:DI 0 "register_operand")
++ (minus:DI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ltu (reg:CCX CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ ""
++{
++ if (operands[1] == const0_rtx)
++ {
++ emit_insn (gen_unegvdi3 (operands[0], operands[2], operands[3]));
++ DONE;
++ }
++
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_usubvdi4_sp32 (operands[0], operands[1], operands[2]));
++ rtx x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCCmode, SPARC_ICC_REG),
++ const0_rtx);
++ emit_jump_insn (gen_cbranchcc4 (x, XEXP (x, 0), XEXP (x, 1), operands[3]));
++ DONE;
++ }
++})
++
++;; Turning an add/sub instruction into the other does not change the Overflow
++;; flag so the 4096 trick can be used for operations in CCXVmode.
++
++(define_expand "subvdi4"
++ [(parallel [(set (reg:CCXV CC_REG)
++ (compare:CCXV (minus:DI (match_operand:DI 1 "register_operand")
++ (match_operand:DI 2 "arith_double_add_operand"))
++ (unspec:DI [(match_dup 1) (match_dup 2)]
++ UNSPEC_SUBV)))
++ (set (match_operand:DI 0 "register_operand")
++ (minus:DI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ne (reg:CCXV CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ ""
++{
++ if (TARGET_ARCH32)
++ {
++ emit_insn (gen_subvdi4_sp32 (operands[0], operands[1], operands[2]));
++ rtx x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CCVmode, SPARC_ICC_REG),
++ const0_rtx);
++ emit_jump_insn (gen_cbranchcc4 (x, XEXP (x, 0), XEXP (x, 1), operands[3]));
++ DONE;
++ }
++})
++
++(define_insn_and_split "subdi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=&r")
++ (minus:DI (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "arith_double_operand" "rHI")))
++ (clobber (reg:CC CC_REG))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CC CC_REG)
++ (compare:CC (match_dup 4) (match_dup 5)))
++ (set (match_dup 3)
++ (minus:SI (match_dup 4) (match_dup 5)))])
++ (set (match_dup 6)
++ (minus:SI (minus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CC CC_REG) (const_int 0))))]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart (SImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "usubvdi4_sp32"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (match_operand:DI 1 "register_operand" "r")
++ (match_operand:DI 2 "arith_double_operand" "rHI")))
++ (set (match_operand:DI 0 "register_operand" "=&r")
++ (minus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CC CC_REG)
++ (compare:CC (match_dup 4) (match_dup 5)))
++ (set (match_dup 3)
++ (minus:SI (match_dup 4) (match_dup 5)))])
++ (parallel [(set (reg:CCC CC_REG)
++ (compare:CCC (zero_extend:DI
++ (minus:SI (minus:SI (match_dup 7)
++ (ltu:SI (reg:CC CC_REG)
++ (const_int 0)))
++ (match_dup 8)))
++ (minus:DI
++ (minus:DI (zero_extend:DI (match_dup 7))
++ (ltu:DI (reg:CC CC_REG)
++ (const_int 0)))
++ (zero_extend:DI (match_dup 8)))))
++ (set (match_dup 6)
++ (minus:SI (minus:SI (match_dup 7)
++ (ltu:SI (reg:CC CC_REG)
++ (const_int 0)))
++ (match_dup 8)))])]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "subvdi4_sp32"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (minus:DI (match_operand:DI 1 "register_operand" "%r")
++ (match_operand:DI 2 "arith_double_operand" "rHI"))
++ (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SUBV)))
++ (set (match_operand:DI 0 "register_operand" "=&r")
++ (minus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CC CC_REG)
++ (compare:CC (match_dup 4) (match_dup 5)))
++ (set (match_dup 3)
++ (minus:SI (match_dup 4) (match_dup 5)))])
++ (parallel [(set (reg:CCV CC_REG)
++ (compare:CCV (minus:SI (minus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CC CC_REG)
++ (const_int 0)))
++ (unspec:SI [(minus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CC CC_REG)
++ (const_int 0))]
++ UNSPEC_SUBV)))
++ (set (match_dup 6)
++ (minus:SI (minus:SI (match_dup 7) (match_dup 8))
++ (ltu:SI (reg:CC CC_REG) (const_int 0))))])]
++{
++ operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_lowpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[2]);
++ operands[6] = gen_highpart (SImode, operands[0]);
++ operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
++ operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
++}
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*subx_extend_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (zero_extend:DI (minus:SI (minus:SI
++ (match_operand:SI 1 "register_or_zero_operand" "rJ")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0)))))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CCC CC_REG) (const_int 0))))
++ (set (match_dup 4) (const_int 0))]
++ "operands[3] = gen_lowpart (SImode, operands[0]);
++ operands[4] = gen_highpart (SImode, operands[0]);"
++ [(set_attr "length" "2")])
++
++(define_insn_and_split "*subdi3_extend_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=&r")
++ (minus:DI (match_operand:DI 1 "register_operand" "r")
++ (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
++ (clobber (reg:CC CC_REG))]
++ "TARGET_ARCH32"
++ "#"
++ "&& reload_completed"
++ [(parallel [(set (reg:CC CC_REG)
++ (compare:CC (match_dup 3) (match_dup 2)))
++ (set (match_dup 5) (minus:SI (match_dup 3) (match_dup 2)))])
++ (set (match_dup 6)
++ (minus:SI (minus:SI (match_dup 4) (const_int 0))
++ (ltu:SI (reg:CC CC_REG) (const_int 0))))]
++ "operands[3] = gen_lowpart (SImode, operands[1]);
++ operands[4] = gen_highpart (SImode, operands[1]);
++ operands[5] = gen_lowpart (SImode, operands[0]);
++ operands[6] = gen_highpart (SImode, operands[0]);"
++ [(set_attr "length" "2")])
++
++(define_insn "*subdi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r,r")
++ (minus:DI (match_operand:DI 1 "register_operand" "r,r")
++ (match_operand:DI 2 "arith_add_operand" "rI,O")))]
++ "TARGET_ARCH64"
++ "@
++ sub\t%1, %2, %0
++ add\t%1, -%2, %0")
++
++(define_insn "subsi3"
++ [(set (match_operand:SI 0 "register_operand" "=r,r")
++ (minus:SI (match_operand:SI 1 "register_operand" "r,r")
++ (match_operand:SI 2 "arith_add_operand" "rI,O")))]
++ ""
++ "@
++ sub\t%1, %2, %0
++ add\t%1, -%2, %0")
++
++;; Turning an add/sub instruction into the other changes the Carry flag
++;; so the 4096 trick cannot be used for operations in CCmode.
++
++(define_expand "usubvsi4"
++ [(parallel [(set (reg:CC CC_REG)
++ (compare:CC (match_operand:SI 1 "register_or_zero_operand")
++ (match_operand:SI 2 "arith_operand")))
++ (set (match_operand:SI 0 "register_operand")
++ (minus:SI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ltu (reg:CC CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ ""
++{
++ if (operands[1] == const0_rtx)
++ {
++ emit_insn (gen_unegvsi3 (operands[0], operands[2], operands[3]));
++ DONE;
++ }
++})
++
++;; Turning an add/sub instruction into the other does not change the Overflow
++;; flag so the 4096 trick can be used for operations in CCVmode.
++
++(define_expand "subvsi4"
++ [(parallel [(set (reg:CCV CC_REG)
++ (compare:CCV (minus:SI (match_operand:SI 1 "register_operand")
++ (match_operand:SI 2 "arith_add_operand"))
++ (unspec:SI [(match_dup 1) (match_dup 2)]
++ UNSPEC_SUBV)))
++ (set (match_operand:SI 0 "register_operand")
++ (minus:SI (match_dup 1) (match_dup 2)))])
++ (set (pc) (if_then_else (ne (reg:CCV CC_REG) (const_int 0))
++ (label_ref (match_operand 3))
++ (pc)))]
++ "")
++
++(define_insn "*cmp_ccnz_minus"
++ [(set (reg:CCNZ CC_REG)
++ (compare:CCNZ (minus:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
++ (match_operand:SI 1 "arith_operand" "rI"))
++ (const_int 0)))]
++ ""
++ "subcc\t%r0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxnz_minus"
++ [(set (reg:CCXNZ CC_REG)
++ (compare:CCXNZ (minus:DI (match_operand:DI 0 "register_or_zero_operand" "rJ")
++ (match_operand:DI 1 "arith_operand" "rI"))
++ (const_int 0)))]
++ "TARGET_ARCH64"
++ "subcc\t%r0, %1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccnz_minus_set"
++ [(set (reg:CCNZ CC_REG)
++ (compare:CCNZ (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (const_int 0)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (minus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "subcc\t%r1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxnz_minus_set"
++ [(set (reg:CCXNZ CC_REG)
++ (compare:CCXNZ (minus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
++ (match_operand:DI 2 "arith_operand" "rI"))
++ (const_int 0)))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (minus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "subcc\t%r1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpsi_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (match_operand:SI 1 "register_or_zero_operand" "rJ")
++ (match_operand:SI 2 "arith_operand" "rI")))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (minus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "subcc\t%r1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmpdi_set"
++ [(set (reg:CCX CC_REG)
++ (compare:CCX (match_operand:DI 1 "register_or_zero_operand" "rJ")
++ (match_operand:DI 2 "arith_operand" "rI")))
++ (set (match_operand:DI 0 "register_operand" "=r")
++ (minus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "subcc\t%r1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccc_minus_sltu_set"
++ [(set (reg:CCC CC_REG)
++ (compare:CCC (zero_extend:DI
++ (minus:SI
++ (minus:SI
++ (match_operand:SI 1 "register_or_zero_operand" "rJ")
++ (ltu:SI (reg:CC CC_REG) (const_int 0)))
++ (match_operand:SI 2 "arith_operand" "rI")))
++ (minus:DI
++ (minus:DI
++ (zero_extend:DI (match_dup 1))
++ (ltu:DI (reg:CC CC_REG) (const_int 0)))
++ (zero_extend:DI (match_dup 2)))))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (minus:SI (minus:SI (match_dup 1)
++ (ltu:SI (reg:CC CC_REG) (const_int 0)))
++ (match_dup 2)))]
++ ""
++ "subxcc\t%r1, %2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_minus"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (minus:SI (match_operand:SI 0 "register_or_zero_operand" "rJ,rJ")
++ (match_operand:SI 1 "arith_add_operand" "rI,O"))
++ (unspec:SI [(match_dup 0) (match_dup 1)] UNSPEC_SUBV)))]
++ ""
++ "@
++ subcc\t%r0, %1, %%g0
++ addcc\t%r0, -%1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxv_minus"
++ [(set (reg:CCXV CC_REG)
++ (compare:CCXV (minus:DI (match_operand:DI 0 "register_or_zero_operand" "rJ,rJ")
++ (match_operand:DI 1 "arith_add_operand" "rI,O"))
++ (unspec:DI [(match_dup 0) (match_dup 1)] UNSPEC_SUBV)))]
++ "TARGET_ARCH64"
++ "@
++ subcc\t%r0, %1, %%g0
++ addcc\t%r0, -%1, %%g0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_minus_set"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
++ (match_operand:SI 2 "arith_add_operand" "rI,O"))
++ (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SUBV)))
++ (set (match_operand:SI 0 "register_operand" "=r,r")
++ (minus:SI (match_dup 1) (match_dup 2)))]
++ ""
++ "@
++ subcc\t%r1, %2, %0
++ addcc\t%r1, -%2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccxv_minus_set"
++ [(set (reg:CCXV CC_REG)
++ (compare:CCXV (minus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ,rJ")
++ (match_operand:DI 2 "arith_add_operand" "rI,O"))
++ (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SUBV)))
++ (set (match_operand:DI 0 "register_operand" "=r,r")
++ (minus:DI (match_dup 1) (match_dup 2)))]
++ "TARGET_ARCH64"
++ "@
++ subcc\t%r1, %2, %0
++ addcc\t%r1, -%2, %0"
++ [(set_attr "type" "compare")])
++
++(define_insn "*cmp_ccv_minus_sltu_set"
++ [(set (reg:CCV CC_REG)
++ (compare:CCV
++ (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (ltu:SI (reg:CC CC_REG) (const_int 0)))
++ (unspec:SI [(minus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CC CC_REG) (const_int 0))]
++ UNSPEC_SUBV)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (minus:SI (minus:SI (match_dup 1) (match_dup 2))
++ (ltu:SI (reg:CC CC_REG) (const_int 0))))]
++ ""
++ "subxcc\t%1, %2, %0"
++ [(set_attr "type" "compare")])
++
++
++;; Integer multiply/divide instructions.
++
++;; The 32-bit multiply/divide instructions are deprecated on v9, but at
++;; least in UltraSPARC I, II and IIi it is a win tick-wise.
++
++(define_expand "mulsi3"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (mult:SI (match_operand:SI 1 "arith_operand" "")
++ (match_operand:SI 2 "arith_operand" "")))]
++ "TARGET_HARD_MUL || TARGET_ARCH64"
++ "")
++
++(define_insn "*mulsi3_sp32"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (mult:SI (match_operand:SI 1 "arith_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI")))]
++ "TARGET_HARD_MUL"
++ "smul\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++(define_insn "*mulsi3_sp64"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (mult:SI (match_operand:SI 1 "arith_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI")))]
++ "TARGET_ARCH64"
++ "mulx\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++(define_expand "muldi3"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (mult:DI (match_operand:DI 1 "arith_operand" "")
++ (match_operand:DI 2 "arith_operand" "")))]
++ "TARGET_ARCH64 || TARGET_V8PLUS"
++{
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++})
++
++(define_insn "*muldi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (match_operand:DI 1 "arith_operand" "%r")
++ (match_operand:DI 2 "arith_operand" "rI")))]
++ "TARGET_ARCH64"
++ "mulx\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++;; V8plus wide multiply.
++(define_insn "muldi3_v8plus"
++ [(set (match_operand:DI 0 "register_operand" "=r,h")
++ (mult:DI (match_operand:DI 1 "arith_operand" "%r,0")
++ (match_operand:DI 2 "arith_operand" "rI,rI")))
++ (clobber (match_scratch:SI 3 "=&h,X"))
++ (clobber (match_scratch:SI 4 "=&h,X"))]
++ "TARGET_V8PLUS"
++{
++ return output_v8plus_mult (insn, operands, \"mulx\");
++}
++ [(set_attr "type" "multi")
++ (set_attr "length" "9,8")])
++
++(define_insn "*cmp_mul_set"
++ [(set (reg:CC CC_REG)
++ (compare:CC (mult:SI (match_operand:SI 1 "arith_operand" "%r")
++ (match_operand:SI 2 "arith_operand" "rI"))
++ (const_int 0)))
++ (set (match_operand:SI 0 "register_operand" "=r")
++ (mult:SI (match_dup 1) (match_dup 2)))]
++ "TARGET_V8 || TARGET_SPARCLITE || TARGET_DEPRECATED_V8_INSNS"
++ "smulcc\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++(define_expand "mulsidi3"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
++ (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))))]
++ "TARGET_HARD_MUL"
++{
++ if (CONSTANT_P (operands[2]))
++ {
++ if (TARGET_V8PLUS)
++ emit_insn (gen_const_mulsidi3_v8plus (operands[0], operands[1],
++ operands[2]));
++ else if (TARGET_ARCH32)
++ emit_insn (gen_const_mulsidi3_sp32 (operands[0], operands[1],
++ operands[2]));
++ else
++ emit_insn (gen_const_mulsidi3_sp64 (operands[0], operands[1],
++ operands[2]));
++ DONE;
++ }
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_mulsidi3_v8plus (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++})
++
++;; V9 puts the 64-bit product in a 64-bit register. Only out or global
++;; registers can hold 64-bit values in the V8plus environment.
++(define_insn "mulsidi3_v8plus"
++ [(set (match_operand:DI 0 "register_operand" "=h,r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
++ (clobber (match_scratch:SI 3 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
++ smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2,3")])
++
++(define_insn "const_mulsidi3_v8plus"
++ [(set (match_operand:DI 0 "register_operand" "=h,r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (match_operand:DI 2 "small_int_operand" "I,I")))
++ (clobber (match_scratch:SI 3 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
++ smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2,3")])
++
++(define_insn "*mulsidi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
++ "TARGET_HARD_MUL32"
++{
++ return TARGET_SPARCLET
++ ? "smuld\t%1, %2, %L0"
++ : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
++}
++ [(set (attr "type")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_string "imul") (const_string "multi")))
++ (set (attr "length")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_int 1) (const_int 2)))])
++
++(define_insn "*mulsidi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
++ "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
++ "smul\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++;; Extra pattern, because sign_extend of a constant isn't valid.
++
++(define_insn "const_mulsidi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "small_int_operand" "I")))]
++ "TARGET_HARD_MUL32"
++{
++ return TARGET_SPARCLET
++ ? "smuld\t%1, %2, %L0"
++ : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
++}
++ [(set (attr "type")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_string "imul") (const_string "multi")))
++ (set (attr "length")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_int 1) (const_int 2)))])
++
++(define_insn "const_mulsidi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "small_int_operand" "I")))]
++ "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
++ "smul\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++(define_expand "smulsi3_highpart"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
++ (sign_extend:DI (match_operand:SI 2 "arith_operand" "")))
++ (const_int 32))))]
++ "TARGET_HARD_MUL && TARGET_ARCH32"
++{
++ if (CONSTANT_P (operands[2]))
++ {
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_const_smulsi3_highpart_v8plus (operands[0],
++ operands[1],
++ operands[2],
++ GEN_INT (32)));
++ DONE;
++ }
++ emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_smulsi3_highpart_v8plus (operands[0], operands[1],
++ operands[2], GEN_INT (32)));
++ DONE;
++ }
++})
++
++(define_insn "smulsi3_highpart_v8plus"
++ [(set (match_operand:SI 0 "register_operand" "=h,r")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
++ (match_operand:SI 3 "small_int_operand" "I,I"))))
++ (clobber (match_scratch:SI 4 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ smul\t%1, %2, %0\;srlx\t%0, %3, %0
++ smul\t%1, %2, %4\;srlx\t%4, %3, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++
++;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
++(define_insn ""
++ [(set (match_operand:SI 0 "register_operand" "=h,r")
++ (subreg:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
++ (match_operand:SI 3 "small_int_operand" "I,I")) 4))
++ (clobber (match_scratch:SI 4 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
++ smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++
++(define_insn "const_smulsi3_highpart_v8plus"
++ [(set (match_operand:SI 0 "register_operand" "=h,r")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (match_operand:DI 2 "small_int_operand" "I,I"))
++ (match_operand:SI 3 "small_int_operand" "I,I"))))
++ (clobber (match_scratch:SI 4 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
++ smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++
++(define_insn "*smulsi3_highpart_sp32"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
++ (const_int 32))))]
++ "TARGET_HARD_MUL32"
++ "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++
++(define_insn "const_smulsi3_highpart"
++ [(set (match_operand:SI 0 "register_operand" "=r")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "small_int_operand" "i"))
++ (const_int 32))))]
++ "TARGET_HARD_MUL32"
++ "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++
++(define_expand "umulsidi3"
++ [(set (match_operand:DI 0 "register_operand" "")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
++ (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))))]
++ "TARGET_HARD_MUL"
++{
++ if (CONSTANT_P (operands[2]))
++ {
++ if (TARGET_V8PLUS)
++ emit_insn (gen_const_umulsidi3_v8plus (operands[0], operands[1],
++ operands[2]));
++ else if (TARGET_ARCH32)
++ emit_insn (gen_const_umulsidi3_sp32 (operands[0], operands[1],
++ operands[2]));
++ else
++ emit_insn (gen_const_umulsidi3_sp64 (operands[0], operands[1],
++ operands[2]));
++ DONE;
++ }
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++})
++
++(define_insn "umulsidi3_v8plus"
++ [(set (match_operand:DI 0 "register_operand" "=h,r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
++ (clobber (match_scratch:SI 3 "=X,&h"))]
++ "TARGET_V8PLUS"
++ "@
++ umul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
++ umul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2,3")])
++
++(define_insn "*umulsidi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
++ "TARGET_HARD_MUL32"
++{
++ return TARGET_SPARCLET
++ ? "umuld\t%1, %2, %L0"
++ : "umul\t%1, %2, %L0\n\trd\t%%y, %H0";
++}
++ [(set (attr "type")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_string "imul") (const_string "multi")))
++ (set (attr "length")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_int 1) (const_int 2)))])
++
++(define_insn "*umulsidi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
++ "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
++ "umul\t%1, %2, %0"
++ [(set_attr "type" "imul")])
++
++;; Extra pattern, because sign_extend of a constant isn't valid.
++
++(define_insn "const_umulsidi3_sp32"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "uns_small_int_operand" "")))]
++ "TARGET_HARD_MUL32"
++{
++ return TARGET_SPARCLET
++ ? "umuld\t%1, %s2, %L0"
++ : "umul\t%1, %s2, %L0\n\trd\t%%y, %H0";
++}
++ [(set (attr "type")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_string "imul") (const_string "multi")))
++ (set (attr "length")
++ (if_then_else (eq_attr "isa" "sparclet")
++ (const_int 1) (const_int 2)))])
++
++(define_insn "const_umulsidi3_sp64"
++ [(set (match_operand:DI 0 "register_operand" "=r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
++ (match_operand:DI 2 "uns_small_int_operand" "")))]
++ "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
++ "umul\t%1, %s2, %0"
++ [(set_attr "type" "imul")])
++
++(define_insn "const_umulsidi3_v8plus"
++ [(set (match_operand:DI 0 "register_operand" "=h,r")
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (match_operand:DI 2 "uns_small_int_operand" "")))
++ (clobber (match_scratch:SI 3 "=X,h"))]
++ "TARGET_V8PLUS"
++ "@
++ umul\t%1, %s2, %L0\n\tsrlx\t%L0, 32, %H0
++ umul\t%1, %s2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2,3")])
++
++(define_expand "umulsi3_highpart"
++ [(set (match_operand:SI 0 "register_operand" "")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
++ (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" "")))
++ (const_int 32))))]
++ "TARGET_HARD_MUL && TARGET_ARCH32"
++{
++ if (CONSTANT_P (operands[2]))
++ {
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_const_umulsi3_highpart_v8plus (operands[0],
++ operands[1],
++ operands[2],
++ GEN_INT (32)));
++ DONE;
++ }
++ emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
++ DONE;
++ }
++ if (TARGET_V8PLUS)
++ {
++ emit_insn (gen_umulsi3_highpart_v8plus (operands[0], operands[1],
++ operands[2], GEN_INT (32)));
++ DONE;
++ }
++})
++
++(define_insn "umulsi3_highpart_v8plus"
++ [(set (match_operand:SI 0 "register_operand" "=h,r")
++ (truncate:SI
++ (lshiftrt:DI
++ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
++ (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
++ (match_operand:SI 3 "small_int_operand" "I,I"))))
++ (clobber (match_scratch:SI 4 "=X,h"))]
++ "TARGET_V8PLUS"
++ "@
++ umul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
++ umul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
++ [(set_attr "type" "multi")
++ (set_attr "length" "2")])
++