diff -Naurp gcc-4.1.0/gcc/config/mips/10000.md gcc-4.1.0.r10k/gcc/config/mips/10000.md --- gcc-4.1.0/gcc/config/mips/10000.md 1970-01-01 00:00:00 +0000 +++ gcc-4.1.0.r10k/gcc/config/mips/10000.md 2006-05-17 17:08:45 +0000 @@ -0,0 +1,248 @@ +;; VR1x000 pipeline description. +;; Copyright (C) 2005, 2006 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 2, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to the +;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, +;; MA 02110-1301, USA. + + +;; This file overrides parts of generic.md. It is derived from the +;; old define_function_unit description. + + + +;; R12K/R14K/R16K are derivatives of R10K, thus copy its description +;; until specific tuning for each is added + + +;; R10000 has int queue, fp queue, address queue +(define_automaton "r10k_int, r10k_fp, r10k_addr") + +;; R10000 has 2 integer ALUs, fp-adder and fp-multiplier, load/store +(define_cpu_unit "r10k_alu1" "r10k_int") +(define_cpu_unit "r10k_alu2" "r10k_int") +(define_cpu_unit "r10k_fpadd" "r10k_fp") +(define_cpu_unit "r10k_fpmpy" "r10k_fp") +(define_cpu_unit "r10k_loadstore" "r10k_addr") + +;; R10000 has separate fp-div and fp-sqrt units as well and these can +;; execute in parallel, however their issue & completion logic is shared +;; by the fp-multiplier +(define_cpu_unit "r10k_fpdiv" "r10k_fp") +(define_cpu_unit "r10k_fpsqrt" "r10k_fp") + + + + +;; loader +(define_insn_reservation "r10k_load" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "load,prefetch,prefetchx")) + "r10k_loadstore") + +(define_insn_reservation "r10k_store" 0 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "store,fpstore,fpidxstore")) + "r10k_loadstore") + +(define_insn_reservation "r10k_fpload" 3 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "fpload,fpidxload")) + "r10k_loadstore") + + + + +;; Integer add/sub + logic ops, and mf/mt hi/lo can be done by alu1 or alu2 +;; Miscellaneous arith goes here too (this is a guess) +(define_insn_reservation "r10k_arith" 1 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "arith,mfhilo,mthilo,slt,clz,const,nop,trap")) + "r10k_alu1 | r10k_alu2") + + + + +;; ALU1 handles shifts, branch eval, and condmove +;; +;; Brancher is separate, but part of ALU1, but can only +;; do one branch per cycle (needs implementing??) +;; +;; jump, call - unsure if brancher handles these too (added for now) +(define_insn_reservation "r10k_shift" 1 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "shift,branch,jump,call")) + "r10k_alu1") + +(define_insn_reservation "r10k_int_cmove" 1 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SI,DI"))) + "r10k_alu1") + + + + +;; Coprocessor Moves +;; mtc1/dmtc1 are handled by ALU1 +;; mfc1/dmfc1 are handled by the fp-multiplier +(define_insn_reservation "r10k_mt_xfer" 3 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "xfer") + (not (match_operand 0 "fpr_operand")))) + "r10k_alu1") + +(define_insn_reservation "r10k_mf_xfer" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "xfer") + (match_operand 0 "fpr_operand"))) + "r10k_fpmpy") + + + + +;; Only ALU2 does int multiplications and divisions +;; R10K allows an int insn using register Lo to be issued +;; one cycle earlier than an insn using register Hi for +;; the insns below, however, we skip on doing this +;; for now until correct usage of lo_operand() is figured +;; out. +;; +;; Divides keep ALU2 busy, but this isn't expressed here (I think...?) +(define_insn_reservation "r10k_imul_single" 6 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "imul,imul3,imadd") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 6") + +(define_insn_reservation "r10k_imul_double" 10 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "imul,imul3,imadd") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 10") + +(define_insn_reservation "r10k_idiv_single" 35 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 35") + +(define_insn_reservation "r10k_idiv_double" 67 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 67") + + + + +;; FP add/sub, mul, abs value, neg, comp, & moves +(define_insn_reservation "r10k_fp_miscadd" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "fadd,fabs,fneg,fcmp")) + "r10k_fpadd") + +(define_insn_reservation "r10k_fp_miscmul" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "fmul,fmove")) + "r10k_fpmpy") + +(define_insn_reservation "r10k_fp_cmove" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SF,DF"))) + "r10k_fpmpy") + + + + +;; fcvt.s.[wl] has latency 4, repeat 2 +;; All other fcvt have latency 2, repeat 1 +(define_insn_reservation "r10k_fcvt_single" 4 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "I2S"))) + "r10k_fpadd * 2") + +(define_insn_reservation "r10k_fcvt_other" 2 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "!I2S"))) + "r10k_fpadd") + + + + +;; fmadd - Runs through fp-adder first, then fp-multiplier +;; +;; The latency for fmadd is 2 cycles if the result is used +;; by another fmadd instruction +(define_insn_reservation "r10k_fmadd" 4 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "fmadd")) + "r10k_fpadd, r10k_fpmpy") + +(define_bypass 2 "r10k_fmadd" "r10k_fmadd") + + + + +;; fp Divisions & square roots +(define_insn_reservation "r10k_fdiv_single" 12 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "SF"))) + "r10k_fpdiv * 14") + +(define_insn_reservation "r10k_fdiv_double" 19 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "DF"))) + "r10k_fpdiv * 21") + +(define_insn_reservation "r10k_fsqrt_single" 18 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_fsqrt_double" 33 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + +(define_insn_reservation "r10k_frsqrt_single" 30 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_frsqrt_double" 52 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + + + + +;; Unknown/multi (this is a guess) +(define_insn_reservation "r10k_unknown" 1 + (and (eq_attr "cpu" "r10000,r12000,r14000,r16000") + (eq_attr "type" "unknown,multi")) + "r10k_alu1 + r10k_alu2") + diff -Naurp gcc-4.1.0/gcc/config/mips/mips.c gcc-4.1.0.r10k/gcc/config/mips/mips.c --- gcc-4.1.0/gcc/config/mips/mips.c 2005-12-09 08:15:58 +0000 +++ gcc-4.1.0.r10k/gcc/config/mips/mips.c 2006-05-17 16:45:38 +0000 @@ -736,6 +736,10 @@ const struct mips_cpu_info mips_cpu_info /* MIPS IV */ { "r8000", PROCESSOR_R8000, 4 }, + { "r10000", PROCESSOR_R10000, 4 }, + { "r12000", PROCESSOR_R12000, 4 }, + { "r14000", PROCESSOR_R14000, 4 }, + { "r16000", PROCESSOR_R16000, 4 }, { "vr5000", PROCESSOR_R5000, 4 }, { "vr5400", PROCESSOR_R5400, 4 }, { "vr5500", PROCESSOR_R5500, 4 }, @@ -1015,6 +1019,58 @@ static struct mips_rtx_cost_data const m 1, /* branch_cost */ 4 /* memory_latency */ }, + { /* R10000 */ + COSTS_N_INSNS (2), /* fp_add */ + COSTS_N_INSNS (2), /* fp_mult_sf */ + COSTS_N_INSNS (2), /* fp_mult_df */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (6), /* int_mult_si */ + COSTS_N_INSNS (10), /* int_mult_di */ + COSTS_N_INSNS (35), /* int_div_si */ + COSTS_N_INSNS (67), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, + { /* R12000 */ + COSTS_N_INSNS (2), /* fp_add */ + COSTS_N_INSNS (2), /* fp_mult_sf */ + COSTS_N_INSNS (2), /* fp_mult_df */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (6), /* int_mult_si */ + COSTS_N_INSNS (10), /* int_mult_di */ + COSTS_N_INSNS (35), /* int_div_si */ + COSTS_N_INSNS (67), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, + { /* R14000 */ + COSTS_N_INSNS (2), /* fp_add */ + COSTS_N_INSNS (2), /* fp_mult_sf */ + COSTS_N_INSNS (2), /* fp_mult_df */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (6), /* int_mult_si */ + COSTS_N_INSNS (10), /* int_mult_di */ + COSTS_N_INSNS (35), /* int_div_si */ + COSTS_N_INSNS (67), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, + { /* R16000 */ + COSTS_N_INSNS (2), /* fp_add */ + COSTS_N_INSNS (2), /* fp_mult_sf */ + COSTS_N_INSNS (2), /* fp_mult_df */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (6), /* int_mult_si */ + COSTS_N_INSNS (10), /* int_mult_di */ + COSTS_N_INSNS (35), /* int_div_si */ + COSTS_N_INSNS (67), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, { /* SB1 */ COSTS_N_INSNS (4), /* fp_add */ COSTS_N_INSNS (4), /* fp_mult_sf */ @@ -9871,6 +9927,12 @@ mips_issue_rate (void) { switch (mips_tune) { + case PROCESSOR_R10000: + case PROCESSOR_R12000: + case PROCESSOR_R14000: + case PROCESSOR_R16000: + return 4; + case PROCESSOR_R4130: case PROCESSOR_R5400: case PROCESSOR_R5500: diff -Naurp gcc-4.1.0/gcc/config/mips/mips.h gcc-4.1.0.r10k/gcc/config/mips/mips.h --- gcc-4.1.0/gcc/config/mips/mips.h 2006-02-17 21:38:59 +0000 +++ gcc-4.1.0.r10k/gcc/config/mips/mips.h 2006-05-17 16:45:38 +0000 @@ -57,6 +57,10 @@ enum processor_type { PROCESSOR_R7000, PROCESSOR_R8000, PROCESSOR_R9000, + PROCESSOR_R10000, + PROCESSOR_R12000, + PROCESSOR_R14000, + PROCESSOR_R16000, PROCESSOR_SB1, PROCESSOR_SR71000, PROCESSOR_MAX @@ -192,6 +196,10 @@ extern const struct mips_rtx_cost_data * #define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500) #define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000) #define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000) +#define TARGET_MIPS10000 (mips_arch == PROCESSOR_R10000) +#define TARGET_MIPS12000 (mips_arch == PROCESSOR_R12000) +#define TARGET_MIPS14000 (mips_arch == PROCESSOR_R14000) +#define TARGET_MIPS16000 (mips_arch == PROCESSOR_R16000) #define TARGET_SB1 (mips_arch == PROCESSOR_SB1) #define TARGET_SR71K (mips_arch == PROCESSOR_SR71000) @@ -207,6 +215,10 @@ extern const struct mips_rtx_cost_data * #define TUNE_MIPS6000 (mips_tune == PROCESSOR_R6000) #define TUNE_MIPS7000 (mips_tune == PROCESSOR_R7000) #define TUNE_MIPS9000 (mips_tune == PROCESSOR_R9000) +#define TUNE_MIPS10000 (mips_tune == PROCESSOR_R10000) +#define TUNE_MIPS12000 (mips_tune == PROCESSOR_R12000) +#define TUNE_MIPS14000 (mips_tune == PROCESSOR_R14000) +#define TUNE_MIPS16000 (mips_tune == PROCESSOR_R16000) #define TUNE_SB1 (mips_tune == PROCESSOR_SB1) /* True if the pre-reload scheduler should try to create chains of diff -Naurp gcc-4.1.0/gcc/config/mips/mips.md gcc-4.1.0.r10k/gcc/config/mips/mips.md --- gcc-4.1.0/gcc/config/mips/mips.md 2005-07-29 17:25:27 +0000 +++ gcc-4.1.0.r10k/gcc/config/mips/mips.md 2006-05-17 16:45:38 +0000 @@ -336,7 +336,7 @@ ;; Attribute describing the processor. This attribute must match exactly ;; with the processor_type enumeration in mips.h. (define_attr "cpu" - "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sr71000" + "r3000,4kc,4kp,5kc,5kf,20kc,24k,24kx,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,r12000,r14000,r16000,sb1,sr71000" (const (symbol_ref "mips_tune"))) ;; The type of hardware hazard associated with this instruction. @@ -585,6 +585,7 @@ (include "6000.md") (include "7000.md") (include "9000.md") +(include "10000.md") (include "sb1.md") (include "sr71k.md") (include "generic.md")