| /* Subroutines used for code generation on IA-32. |
| Copyright (C) 1988-2014 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "calls.h" |
| #include "stor-layout.h" |
| #include "varasm.h" |
| #include "tm_p.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "insn-config.h" |
| #include "conditions.h" |
| #include "output.h" |
| #include "insn-codes.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "except.h" |
| #include "function.h" |
| #include "recog.h" |
| #include "expr.h" |
| #include "optabs.h" |
| #include "diagnostic-core.h" |
| #include "toplev.h" |
| #include "basic-block.h" |
| #include "ggc.h" |
| #include "target.h" |
| #include "target-def.h" |
| #include "common/common-target.h" |
| #include "langhooks.h" |
| #include "reload.h" |
| #include "cgraph.h" |
| #include "pointer-set.h" |
| #include "hash-table.h" |
| #include "vec.h" |
| #include "basic-block.h" |
| #include "tree-ssa-alias.h" |
| #include "internal-fn.h" |
| #include "gimple-fold.h" |
| #include "tree-eh.h" |
| #include "gimple-expr.h" |
| #include "is-a.h" |
| #include "gimple.h" |
| #include "gimplify.h" |
| #include "cfgloop.h" |
| #include "dwarf2.h" |
| #include "df.h" |
| #include "tm-constrs.h" |
| #include "params.h" |
| #include "cselib.h" |
| #include "debug.h" |
| #include "sched-int.h" |
| #include "sbitmap.h" |
| #include "fibheap.h" |
| #include "opts.h" |
| #include "diagnostic.h" |
| #include "dumpfile.h" |
| #include "tree-pass.h" |
| #include "wide-int.h" |
| #include "context.h" |
| #include "pass_manager.h" |
| #include "target-globals.h" |
| #include "tree-vectorizer.h" |
| #include "shrink-wrap.h" |
| |
| static rtx legitimize_dllimport_symbol (rtx, bool); |
| static rtx legitimize_pe_coff_extern_decl (rtx, bool); |
| static rtx legitimize_pe_coff_symbol (rtx, bool); |
| |
| #ifndef CHECK_STACK_LIMIT |
| #define CHECK_STACK_LIMIT (-1) |
| #endif |
| |
| /* Return index of given mode in mult and division cost tables. */ |
| #define MODE_INDEX(mode) \ |
| ((mode) == QImode ? 0 \ |
| : (mode) == HImode ? 1 \ |
| : (mode) == SImode ? 2 \ |
| : (mode) == DImode ? 3 \ |
| : 4) |
| |
| /* Processor costs (relative to an add) */ |
| /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ |
| #define COSTS_N_BYTES(N) ((N) * 2) |
| |
| #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}} |
| |
| static stringop_algs ix86_size_memcpy[2] = { |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; |
| static stringop_algs ix86_size_memset[2] = { |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; |
| |
| const |
| struct processor_costs ix86_size_cost = {/* costs for tuning for size */ |
| COSTS_N_BYTES (2), /* cost of an add instruction */ |
| COSTS_N_BYTES (3), /* cost of a lea instruction */ |
| COSTS_N_BYTES (2), /* variable shift costs */ |
| COSTS_N_BYTES (3), /* constant shift costs */ |
| {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ |
| COSTS_N_BYTES (3), /* HI */ |
| COSTS_N_BYTES (3), /* SI */ |
| COSTS_N_BYTES (3), /* DI */ |
| COSTS_N_BYTES (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ |
| COSTS_N_BYTES (3), /* HI */ |
| COSTS_N_BYTES (3), /* SI */ |
| COSTS_N_BYTES (3), /* DI */ |
| COSTS_N_BYTES (5)}, /* other */ |
| COSTS_N_BYTES (3), /* cost of movsx */ |
| COSTS_N_BYTES (3), /* cost of movzx */ |
| 0, /* "large" insn */ |
| 2, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {2, 2, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 2, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 2}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {2, 2, 2}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 3, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {3, 3}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 3, /* cost of moving SSE register */ |
| {3, 3, 3}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {3, 3, 3}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of l1 cache */ |
| 0, /* size of l2 cache */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ |
| COSTS_N_BYTES (2), /* cost of FMUL instruction. */ |
| COSTS_N_BYTES (2), /* cost of FDIV instruction. */ |
| COSTS_N_BYTES (2), /* cost of FABS instruction. */ |
| COSTS_N_BYTES (2), /* cost of FCHS instruction. */ |
| COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ |
| ix86_size_memcpy, |
| ix86_size_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 1, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 1, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* Processor costs (relative to an add) */ |
| static stringop_algs i386_memcpy[2] = { |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs i386_memset[2] = { |
| {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| |
| static const |
| struct processor_costs i386_cost = { /* 386 specific costs */ |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (3), /* variable shift costs */ |
| COSTS_N_INSNS (2), /* constant shift costs */ |
| {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (6), /* HI */ |
| COSTS_N_INSNS (6), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (23), /* HI */ |
| COSTS_N_INSNS (23), /* SI */ |
| COSTS_N_INSNS (23), /* DI */ |
| COSTS_N_INSNS (23)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 15, /* "large" insn */ |
| 3, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {8, 8, 8}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {8, 8, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 0, /* size of l1 cache */ |
| 0, /* size of l2 cache */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (27), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (88), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (22), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (24), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ |
| i386_memcpy, |
| i386_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs i486_memcpy[2] = { |
| {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs i486_memset[2] = { |
| {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| |
| static const |
| struct processor_costs i486_cost = { /* 486 specific costs */ |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (3), /* variable shift costs */ |
| COSTS_N_INSNS (2), /* constant shift costs */ |
| {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (12), /* HI */ |
| COSTS_N_INSNS (12), /* SI */ |
| COSTS_N_INSNS (12), /* DI */ |
| COSTS_N_INSNS (12)}, /* other */ |
| 1, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (40), /* HI */ |
| COSTS_N_INSNS (40), /* SI */ |
| COSTS_N_INSNS (40), /* DI */ |
| COSTS_N_INSNS (40)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 15, /* "large" insn */ |
| 3, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {8, 8, 8}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {8, 8, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 4, /* size of l1 cache. 486 has 8kB cache |
| shared for code and data, so 4kB is |
| not really precise. */ |
| 4, /* size of l2 cache */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (16), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (73), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (3), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (3), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ |
| i486_memcpy, |
| i486_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs pentium_memcpy[2] = { |
| {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs pentium_memset[2] = { |
| {libcall, {{-1, rep_prefix_4_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| |
| static const |
| struct processor_costs pentium_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (4), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (11), /* HI */ |
| COSTS_N_INSNS (11), /* SI */ |
| COSTS_N_INSNS (11), /* DI */ |
| COSTS_N_INSNS (11)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (25), /* HI */ |
| COSTS_N_INSNS (25), /* SI */ |
| COSTS_N_INSNS (25), /* DI */ |
| COSTS_N_INSNS (25)}, /* other */ |
| COSTS_N_INSNS (3), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 6, /* cost for loading QImode using movzbl */ |
| {2, 4, 2}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 4, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 8, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 8, 16}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 8, 16}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 8, /* size of l1 cache. */ |
| 8, /* size of l2 cache */ |
| 0, /* size of prefetch block */ |
| 0, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (3), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (39), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (1), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (1), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ |
| pentium_memcpy, |
| pentium_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes |
| (we ensure the alignment). For small blocks inline loop is still a |
| noticeable win, for bigger blocks either rep movsl or rep movsb is |
| way to go. Rep movsb has apparently more expensive startup time in CPU, |
| but after 4K the difference is down in the noise. */ |
| static stringop_algs pentiumpro_memcpy[2] = { |
| {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, |
| {8192, rep_prefix_4_byte, false}, |
| {-1, rep_prefix_1_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs pentiumpro_memset[2] = { |
| {rep_prefix_4_byte, {{1024, unrolled_loop, false}, |
| {8192, rep_prefix_4_byte, false}, |
| {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static const |
| struct processor_costs pentiumpro_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (4)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (17), /* HI */ |
| COSTS_N_INSNS (17), /* SI */ |
| COSTS_N_INSNS (17), /* DI */ |
| COSTS_N_INSNS (17)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 2, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {2, 2, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| 8, /* size of l1 cache. */ |
| 256, /* size of l2 cache */ |
| 32, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (5), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (56), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ |
| pentiumpro_memcpy, |
| pentiumpro_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs geode_memcpy[2] = { |
| {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs geode_memset[2] = { |
| {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static const |
| struct processor_costs geode_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (2), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (7), /* SI */ |
| COSTS_N_INSNS (7), /* DI */ |
| COSTS_N_INSNS (7)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (23), /* HI */ |
| COSTS_N_INSNS (39), /* SI */ |
| COSTS_N_INSNS (39), /* DI */ |
| COSTS_N_INSNS (39)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 4, /* MOVE_RATIO */ |
| 1, /* cost for loading QImode using movzbl */ |
| {1, 1, 1}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {1, 1, 1}, /* cost of storing integer registers */ |
| 1, /* cost of reg,reg fld/fst */ |
| {1, 1, 1}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 6, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| |
| 1, /* cost of moving MMX register */ |
| {1, 1}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {1, 1}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 1, /* cost of moving SSE register */ |
| {1, 1, 1}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {1, 1, 1}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 1, /* MMX or SSE register to integer */ |
| 64, /* size of l1 cache. */ |
| 128, /* size of l2 cache. */ |
| 32, /* size of prefetch block */ |
| 1, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (11), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (47), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (1), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (1), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ |
| geode_memcpy, |
| geode_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs k6_memcpy[2] = { |
| {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs k6_memset[2] = { |
| {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static const |
| struct processor_costs k6_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (3), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (3), /* DI */ |
| COSTS_N_INSNS (3)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (18), /* HI */ |
| COSTS_N_INSNS (18), /* SI */ |
| COSTS_N_INSNS (18), /* DI */ |
| COSTS_N_INSNS (18)}, /* other */ |
| COSTS_N_INSNS (2), /* cost of movsx */ |
| COSTS_N_INSNS (2), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 4, /* MOVE_RATIO */ |
| 3, /* cost for loading QImode using movzbl */ |
| {4, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 3, 2}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {6, 6, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 4}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {2, 2, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 6, /* MMX or SSE register to integer */ |
| 32, /* size of l1 cache. */ |
| 32, /* size of l2 cache. Some models |
| have integrated l2 cache, but |
| optimizing for k6 is not important |
| enough to worry about that. */ |
| 32, /* size of prefetch block */ |
| 1, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (2), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (56), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ |
| k6_memcpy, |
| k6_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* For some reason, Athlon deals better with REP prefix (relative to loops) |
| compared to K8. Alignment becomes important after 8 bytes for memcpy and |
| 128 bytes for memset. */ |
| static stringop_algs athlon_memcpy[2] = { |
| {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs athlon_memset[2] = { |
| {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static const |
| struct processor_costs athlon_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (5), /* HI */ |
| COSTS_N_INSNS (5), /* SI */ |
| COSTS_N_INSNS (5), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 6}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of l1 cache. */ |
| 256, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 5, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (24), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| athlon_memcpy, |
| athlon_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* K8 has optimized REP instruction for medium sized blocks, but for very |
| small blocks it is better to use loop. For large blocks, libcall can |
| do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs k8_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs k8_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static const |
| struct processor_costs k8_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 3, 6}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of l1 cache. */ |
| 512, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (19), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| |
| k8_memcpy, |
| k8_memset, |
| 4, /* scalar_stmt_cost. */ |
| 2, /* scalar load_cost. */ |
| 2, /* scalar_store_cost. */ |
| 5, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 2, /* vec_align_load_cost. */ |
| 3, /* vec_unalign_load_cost. */ |
| 3, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 2, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall can |
| do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs amdfam10_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs amdfam10_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| struct processor_costs amdfam10_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 3}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| /* On K8: |
| MOVD reg64, xmmreg Double FSTORE 4 |
| MOVD reg32, xmmreg Double FSTORE 4 |
| On AMDFAM10: |
| MOVD reg64, xmmreg Double FADD 3 |
| 1/1 1/1 |
| MOVD reg32, xmmreg Double FADD 3 |
| 1/1 1/1 */ |
| 64, /* size of l1 cache. */ |
| 512, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (19), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| |
| amdfam10_memcpy, |
| amdfam10_memset, |
| 4, /* scalar_stmt_cost. */ |
| 2, /* scalar load_cost. */ |
| 2, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 2, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 2, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* BDVER1 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall |
| can do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs bdver1_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs bdver1_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| |
| const struct processor_costs bdver1_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {5, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {5, 5, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 4}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 4}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 2, /* MMX or SSE register to integer */ |
| /* On K8: |
| MOVD reg64, xmmreg Double FSTORE 4 |
| MOVD reg32, xmmreg Double FSTORE 4 |
| On AMDFAM10: |
| MOVD reg64, xmmreg Double FADD 3 |
| 1/1 1/1 |
| MOVD reg32, xmmreg Double FADD 3 |
| 1/1 1/1 */ |
| 16, /* size of l1 cache. */ |
| 2048, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (6), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (42), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ |
| |
| bdver1_memcpy, |
| bdver1_memset, |
| 6, /* scalar_stmt_cost. */ |
| 4, /* scalar load_cost. */ |
| 4, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 4, /* vec_align_load_cost. */ |
| 4, /* vec_unalign_load_cost. */ |
| 4, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* BDVER2 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall |
| can do nontemporary accesses and beat inline considerably. */ |
| |
| static stringop_algs bdver2_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs bdver2_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| |
| const struct processor_costs bdver2_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {5, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {5, 5, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 4}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 4}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 2, /* MMX or SSE register to integer */ |
| /* On K8: |
| MOVD reg64, xmmreg Double FSTORE 4 |
| MOVD reg32, xmmreg Double FSTORE 4 |
| On AMDFAM10: |
| MOVD reg64, xmmreg Double FADD 3 |
| 1/1 1/1 |
| MOVD reg32, xmmreg Double FADD 3 |
| 1/1 1/1 */ |
| 16, /* size of l1 cache. */ |
| 2048, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (6), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (42), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ |
| |
| bdver2_memcpy, |
| bdver2_memset, |
| 6, /* scalar_stmt_cost. */ |
| 4, /* scalar load_cost. */ |
| 4, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 4, /* vec_align_load_cost. */ |
| 4, /* vec_unalign_load_cost. */ |
| 4, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| |
| /* BDVER3 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall |
| can do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs bdver3_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs bdver3_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| struct processor_costs bdver3_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {5, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {5, 5, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 4}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 4}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 2, /* MMX or SSE register to integer */ |
| 16, /* size of l1 cache. */ |
| 2048, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (6), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (42), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ |
| |
| bdver3_memcpy, |
| bdver3_memset, |
| 6, /* scalar_stmt_cost. */ |
| 4, /* scalar load_cost. */ |
| 4, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 4, /* vec_align_load_cost. */ |
| 4, /* vec_unalign_load_cost. */ |
| 4, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* BDVER4 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall |
| can do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs bdver4_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs bdver4_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| struct processor_costs bdver4_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (4), /* SI */ |
| COSTS_N_INSNS (6), /* DI */ |
| COSTS_N_INSNS (6)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {5, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {5, 5, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {4, 4}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 4}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 4}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 2, /* MMX or SSE register to integer */ |
| 16, /* size of l1 cache. */ |
| 2048, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| /* New AMD processors never drop prefetches; if they cannot be performed |
| immediately, they are queued. We set number of simultaneous prefetches |
| to a large constant to reflect this (it probably is not a good idea not |
| to limit number of prefetches at all, as their execution also takes some |
| time). */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (6), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (42), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ |
| |
| bdver4_memcpy, |
| bdver4_memset, |
| 6, /* scalar_stmt_cost. */ |
| 4, /* scalar load_cost. */ |
| 4, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 4, /* vec_align_load_cost. */ |
| 4, /* vec_unalign_load_cost. */ |
| 4, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* BTVER1 has optimized REP instruction for medium sized blocks, but for |
| very small blocks it is better to use loop. For large blocks, libcall can |
| do nontemporary accesses and beat inline considerably. */ |
| static stringop_algs btver1_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs btver1_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| const struct processor_costs btver1_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 3}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| /* On K8: |
| MOVD reg64, xmmreg Double FSTORE 4 |
| MOVD reg32, xmmreg Double FSTORE 4 |
| On AMDFAM10: |
| MOVD reg64, xmmreg Double FADD 3 |
| 1/1 1/1 |
| MOVD reg32, xmmreg Double FADD 3 |
| 1/1 1/1 */ |
| 32, /* size of l1 cache. */ |
| 512, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (19), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| |
| btver1_memcpy, |
| btver1_memset, |
| 4, /* scalar_stmt_cost. */ |
| 2, /* scalar load_cost. */ |
| 2, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 2, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 2, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs btver2_memcpy[2] = { |
| {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
| {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs btver2_memset[2] = { |
| {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| const struct processor_costs btver2_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (2), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (5)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (35), /* HI */ |
| COSTS_N_INSNS (51), /* SI */ |
| COSTS_N_INSNS (83), /* DI */ |
| COSTS_N_INSNS (83)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 9, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {3, 4, 3}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {3, 4, 3}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {4, 4, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {3, 3}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {4, 4}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {4, 4, 3}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {4, 4, 5}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 3, /* MMX or SSE register to integer */ |
| /* On K8: |
| MOVD reg64, xmmreg Double FSTORE 4 |
| MOVD reg32, xmmreg Double FSTORE 4 |
| On AMDFAM10: |
| MOVD reg64, xmmreg Double FADD 3 |
| 1/1 1/1 |
| MOVD reg32, xmmreg Double FADD 3 |
| 1/1 1/1 */ |
| 32, /* size of l1 cache. */ |
| 2048, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 100, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (4), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (19), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ |
| btver2_memcpy, |
| btver2_memset, |
| 4, /* scalar_stmt_cost. */ |
| 2, /* scalar load_cost. */ |
| 2, /* scalar_store_cost. */ |
| 6, /* vec_stmt_cost. */ |
| 0, /* vec_to_scalar_cost. */ |
| 2, /* scalar_to_vec_cost. */ |
| 2, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 2, /* vec_store_cost. */ |
| 2, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs pentium4_memcpy[2] = { |
| {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| static stringop_algs pentium4_memset[2] = { |
| {libcall, {{6, loop_1_byte, false}, {48, loop, false}, |
| {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| DUMMY_STRINGOP_ALGS}; |
| |
| static const |
| struct processor_costs pentium4_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (3), /* cost of a lea instruction */ |
| COSTS_N_INSNS (4), /* variable shift costs */ |
| COSTS_N_INSNS (4), /* constant shift costs */ |
| {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (15), /* HI */ |
| COSTS_N_INSNS (15), /* SI */ |
| COSTS_N_INSNS (15), /* DI */ |
| COSTS_N_INSNS (15)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (56), /* HI */ |
| COSTS_N_INSNS (56), /* SI */ |
| COSTS_N_INSNS (56), /* DI */ |
| COSTS_N_INSNS (56)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 16, /* "large" insn */ |
| 6, /* MOVE_RATIO */ |
| 2, /* cost for loading QImode using movzbl */ |
| {4, 5, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {2, 3, 2}, /* cost of storing integer registers */ |
| 2, /* cost of reg,reg fld/fst */ |
| {2, 2, 6}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 6}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {2, 2}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {2, 2}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 12, /* cost of moving SSE register */ |
| {12, 12, 12}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {2, 2, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 10, /* MMX or SSE register to integer */ |
| 8, /* size of l1 cache. */ |
| 256, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 2, /* Branch cost */ |
| COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (7), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (43), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (2), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (2), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ |
| pentium4_memcpy, |
| pentium4_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs nocona_memcpy[2] = { |
| {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false}, |
| {100000, unrolled_loop, false}, {-1, libcall, false}}}}; |
| |
| static stringop_algs nocona_memset[2] = { |
| {libcall, {{6, loop_1_byte, false}, {48, loop, false}, |
| {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{24, loop, false}, {64, unrolled_loop, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| |
| static const |
| struct processor_costs nocona_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1), /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (10), /* HI */ |
| COSTS_N_INSNS (10), /* SI */ |
| COSTS_N_INSNS (10), /* DI */ |
| COSTS_N_INSNS (10)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (66), /* HI */ |
| COSTS_N_INSNS (66), /* SI */ |
| COSTS_N_INSNS (66), /* DI */ |
| COSTS_N_INSNS (66)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 16, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 3, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {4, 4, 4}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 6, /* cost of moving MMX register */ |
| {12, 12}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {12, 12}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 6, /* cost of moving SSE register */ |
| {12, 12, 12}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {12, 12, 12}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 8, /* MMX or SSE register to integer */ |
| 8, /* size of l1 cache. */ |
| 1024, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 8, /* number of parallel prefetches */ |
| 1, /* Branch cost */ |
| COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (40), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (3), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (3), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ |
| nocona_memcpy, |
| nocona_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs atom_memcpy[2] = { |
| {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static stringop_algs atom_memset[2] = { |
| {libcall, {{8, loop, false}, {15, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{24, loop, false}, {32, unrolled_loop, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static const |
| struct processor_costs atom_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 32, /* size of l1 cache. */ |
| 256, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| atom_memcpy, |
| atom_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs slm_memcpy[2] = { |
| {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static stringop_algs slm_memset[2] = { |
| {libcall, {{8, loop, false}, {15, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{24, loop, false}, {32, unrolled_loop, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static const |
| struct processor_costs slm_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (3), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 32, /* size of l1 cache. */ |
| 256, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| slm_memcpy, |
| slm_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 4, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| static stringop_algs intel_memcpy[2] = { |
| {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, |
| {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static stringop_algs intel_memset[2] = { |
| {libcall, {{8, loop, false}, {15, unrolled_loop, false}, |
| {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, |
| {libcall, {{24, loop, false}, {32, unrolled_loop, false}, |
| {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; |
| static const |
| struct processor_costs intel_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (3), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 32, /* size of l1 cache. */ |
| 256, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| intel_memcpy, |
| intel_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 4, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* Generic should produce code tuned for Core-i7 (and newer chips) |
| and btver1 (and newer chips). */ |
| |
| static stringop_algs generic_memcpy[2] = { |
| {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, |
| {-1, libcall, false}}}, |
| {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static stringop_algs generic_memset[2] = { |
| {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, |
| {-1, libcall, false}}}, |
| {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, |
| {-1, libcall, false}}}}; |
| static const |
| struct processor_costs generic_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| /* On all chips taken into consideration lea is 2 cycles and more. With |
| this cost however our current implementation of synth_mult results in |
| use of unnecessary temporary registers causing regression on several |
| SPECfp benchmarks. */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 32, /* size of l1 cache. */ |
| 512, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| /* Benchmarks shows large regressions on K8 sixtrack benchmark when this |
| value is increased to perhaps more appropriate value of 5. */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| generic_memcpy, |
| generic_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* core_cost should produce code tuned for Core familly of CPUs. */ |
| static stringop_algs core_memcpy[2] = { |
| {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, |
| {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true}, |
| {-1, libcall, false}}}}; |
| static stringop_algs core_memset[2] = { |
| {libcall, {{6, loop_1_byte, true}, |
| {24, loop, true}, |
| {8192, rep_prefix_4_byte, true}, |
| {-1, libcall, false}}}, |
| {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, |
| {-1, libcall, false}}}}; |
| |
| static const |
| struct processor_costs core_cost = { |
| COSTS_N_INSNS (1), /* cost of an add instruction */ |
| /* On all chips taken into consideration lea is 2 cycles and more. With |
| this cost however our current implementation of synth_mult results in |
| use of unnecessary temporary registers causing regression on several |
| SPECfp benchmarks. */ |
| COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
| COSTS_N_INSNS (1), /* variable shift costs */ |
| COSTS_N_INSNS (1), /* constant shift costs */ |
| {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
| COSTS_N_INSNS (4), /* HI */ |
| COSTS_N_INSNS (3), /* SI */ |
| COSTS_N_INSNS (4), /* DI */ |
| COSTS_N_INSNS (2)}, /* other */ |
| 0, /* cost of multiply per each bit set */ |
| {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
| COSTS_N_INSNS (26), /* HI */ |
| COSTS_N_INSNS (42), /* SI */ |
| COSTS_N_INSNS (74), /* DI */ |
| COSTS_N_INSNS (74)}, /* other */ |
| COSTS_N_INSNS (1), /* cost of movsx */ |
| COSTS_N_INSNS (1), /* cost of movzx */ |
| 8, /* "large" insn */ |
| 17, /* MOVE_RATIO */ |
| 4, /* cost for loading QImode using movzbl */ |
| {4, 4, 4}, /* cost of loading integer registers |
| in QImode, HImode and SImode. |
| Relative to reg-reg move (2). */ |
| {4, 4, 4}, /* cost of storing integer registers */ |
| 4, /* cost of reg,reg fld/fst */ |
| {12, 12, 12}, /* cost of loading fp registers |
| in SFmode, DFmode and XFmode */ |
| {6, 6, 8}, /* cost of storing fp registers |
| in SFmode, DFmode and XFmode */ |
| 2, /* cost of moving MMX register */ |
| {8, 8}, /* cost of loading MMX registers |
| in SImode and DImode */ |
| {8, 8}, /* cost of storing MMX registers |
| in SImode and DImode */ |
| 2, /* cost of moving SSE register */ |
| {8, 8, 8}, /* cost of loading SSE registers |
| in SImode, DImode and TImode */ |
| {8, 8, 8}, /* cost of storing SSE registers |
| in SImode, DImode and TImode */ |
| 5, /* MMX or SSE register to integer */ |
| 64, /* size of l1 cache. */ |
| 512, /* size of l2 cache. */ |
| 64, /* size of prefetch block */ |
| 6, /* number of parallel prefetches */ |
| /* FIXME perhaps more appropriate value is 5. */ |
| 3, /* Branch cost */ |
| COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
| COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
| COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
| COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
| COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
| COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
| core_memcpy, |
| core_memset, |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 2, /* vec_unalign_load_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| |
| /* Set by -mtune. */ |
| const struct processor_costs *ix86_tune_cost = &pentium_cost; |
| |
| /* Set by -mtune or -Os. */ |
| const struct processor_costs *ix86_cost = &pentium_cost; |
| |
| /* Processor feature/optimization bitmasks. */ |
| #define m_386 (1<<PROCESSOR_I386) |
| #define m_486 (1<<PROCESSOR_I486) |
| #define m_PENT (1<<PROCESSOR_PENTIUM) |
| #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) |
| #define m_PENT4 (1<<PROCESSOR_PENTIUM4) |
| #define m_NOCONA (1<<PROCESSOR_NOCONA) |
| #define m_P4_NOCONA (m_PENT4 | m_NOCONA) |
| #define m_CORE2 (1<<PROCESSOR_CORE2) |
| #define m_NEHALEM (1<<PROCESSOR_NEHALEM) |
| #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE) |
| #define m_HASWELL (1<<PROCESSOR_HASWELL) |
| #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) |
| #define m_BONNELL (1<<PROCESSOR_BONNELL) |
| #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT) |
| #define m_INTEL (1<<PROCESSOR_INTEL) |
| |
| #define m_GEODE (1<<PROCESSOR_GEODE) |
| #define m_K6 (1<<PROCESSOR_K6) |
| #define m_K6_GEODE (m_K6 | m_GEODE) |
| #define m_K8 (1<<PROCESSOR_K8) |
| #define m_ATHLON (1<<PROCESSOR_ATHLON) |
| #define m_ATHLON_K8 (m_K8 | m_ATHLON) |
| #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) |
| #define m_BDVER1 (1<<PROCESSOR_BDVER1) |
| #define m_BDVER2 (1<<PROCESSOR_BDVER2) |
| #define m_BDVER3 (1<<PROCESSOR_BDVER3) |
| #define m_BDVER4 (1<<PROCESSOR_BDVER4) |
| #define m_BTVER1 (1<<PROCESSOR_BTVER1) |
| #define m_BTVER2 (1<<PROCESSOR_BTVER2) |
| #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4) |
| #define m_BTVER (m_BTVER1 | m_BTVER2) |
| #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER) |
| |
| #define m_GENERIC (1<<PROCESSOR_GENERIC) |
| |
| const char* ix86_tune_feature_names[X86_TUNE_LAST] = { |
| #undef DEF_TUNE |
| #define DEF_TUNE(tune, name, selector) name, |
| #include "x86-tune.def" |
| #undef DEF_TUNE |
| }; |
| |
| /* Feature tests against the various tunings. */ |
| unsigned char ix86_tune_features[X86_TUNE_LAST]; |
| |
| /* Feature tests against the various tunings used to create ix86_tune_features |
| based on the processor mask. */ |
| static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
| #undef DEF_TUNE |
| #define DEF_TUNE(tune, name, selector) selector, |
| #include "x86-tune.def" |
| #undef DEF_TUNE |
| }; |
| |
| /* Feature tests against the various architecture variations. */ |
| unsigned char ix86_arch_features[X86_ARCH_LAST]; |
| |
| /* Feature tests against the various architecture variations, used to create |
| ix86_arch_features based on the processor mask. */ |
| static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { |
| /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */ |
| ~(m_386 | m_486 | m_PENT | m_K6), |
| |
| /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ |
| ~m_386, |
| |
| /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ |
| ~(m_386 | m_486), |
| |
| /* X86_ARCH_XADD: Exchange and add was added for 80486. */ |
| ~m_386, |
| |
| /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ |
| ~m_386, |
| }; |
| |
| /* In case the average insn count for single function invocation is |
| lower than this constant, emit fast (but longer) prologue and |
| epilogue code. */ |
| #define FAST_PROLOGUE_INSN_COUNT 20 |
| |
| /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
| static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
| static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
| static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
| |
| /* Array of the smallest class containing reg number REGNO, indexed by |
| REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
| |
| enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* ax, dx, cx, bx */ |
| AREG, DREG, CREG, BREG, |
| /* si, di, bp, sp */ |
| SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
| /* FP registers */ |
| FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
| FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
| /* arg pointer */ |
| NON_Q_REGS, |
| /* flags, fpsr, fpcr, frame */ |
| NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
| /* SSE registers */ |
| SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, |
| /* MMX registers */ |
| MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| MMX_REGS, MMX_REGS, |
| /* REX registers */ |
| NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
| NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, |
| /* SSE REX registers */ |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, |
| /* AVX-512 SSE registers */ |
| EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
| EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
| EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
| EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, |
| /* Mask registers. */ |
| MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, |
| MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, |
| }; |
| |
| /* The "default" register map used in 32bit mode. */ |
| |
| int const dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ |
| 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ |
| -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
| 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ |
| 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ |
| 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ |
| }; |
| |
| /* The "default" register map used in 64bit mode. */ |
| |
| int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ |
| 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ |
| -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
| 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ |
| 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ |
| 8,9,10,11,12,13,14,15, /* extended integer registers */ |
| 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ |
| 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */ |
| 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */ |
| 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */ |
| }; |
| |
| /* Define the register numbers to be used in Dwarf debugging information. |
| The SVR4 reference port C compiler uses the following register numbers |
| in its Dwarf output code: |
| 0 for %eax (gcc regno = 0) |
| 1 for %ecx (gcc regno = 2) |
| 2 for %edx (gcc regno = 1) |
| 3 for %ebx (gcc regno = 3) |
| 4 for %esp (gcc regno = 7) |
| 5 for %ebp (gcc regno = 6) |
| 6 for %esi (gcc regno = 4) |
| 7 for %edi (gcc regno = 5) |
| The following three DWARF register numbers are never generated by |
| the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
| believes these numbers have these meanings. |
| 8 for %eip (no gcc equivalent) |
| 9 for %eflags (gcc regno = 17) |
| 10 for %trapno (no gcc equivalent) |
| It is not at all clear how we should number the FP stack registers |
| for the x86 architecture. If the version of SDB on x86/svr4 were |
| a bit less brain dead with respect to floating-point then we would |
| have a precedent to follow with respect to DWARF register numbers |
| for x86 FP registers, but the SDB on x86/svr4 is so completely |
| broken with respect to FP registers that it is hardly worth thinking |
| of it as something to strive for compatibility with. |
| The version of x86/svr4 SDB I have at the moment does (partially) |
| seem to believe that DWARF register number 11 is associated with |
| the x86 register %st(0), but that's about all. Higher DWARF |
| register numbers don't seem to be associated with anything in |
| particular, and even for DWARF regno 11, SDB only seems to under- |
| stand that it should say that a variable lives in %st(0) (when |
| asked via an `=' command) if we said it was in DWARF regno 11, |
| but SDB still prints garbage when asked for the value of the |
| variable in question (via a `/' command). |
| (Also note that the labels SDB prints for various FP stack regs |
| when doing an `x' command are all wrong.) |
| Note that these problems generally don't affect the native SVR4 |
| C compiler because it doesn't allow the use of -O with -g and |
| because when it is *not* optimizing, it allocates a memory |
| location for each floating-point variable, and the memory |
| location is what gets described in the DWARF AT_location |
| attribute for the variable in question. |
| Regardless of the severe mental illness of the x86/svr4 SDB, we |
| do something sensible here and we use the following DWARF |
| register numbers. Note that these are all stack-top-relative |
| numbers. |
| 11 for %st(0) (gcc regno = 8) |
| 12 for %st(1) (gcc regno = 9) |
| 13 for %st(2) (gcc regno = 10) |
| 14 for %st(3) (gcc regno = 11) |
| 15 for %st(4) (gcc regno = 12) |
| 16 for %st(5) (gcc regno = 13) |
| 17 for %st(6) (gcc regno = 14) |
| 18 for %st(7) (gcc regno = 15) |
| */ |
| int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ |
| 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ |
| -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ |
| 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ |
| 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/ |
| -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/ |
| 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */ |
| }; |
| |
| /* Define parameter passing and return registers. */ |
| |
| static int const x86_64_int_parameter_registers[6] = |
| { |
| DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
| }; |
| |
| static int const x86_64_ms_abi_int_parameter_registers[4] = |
| { |
| CX_REG, DX_REG, R8_REG, R9_REG |
| }; |
| |
| static int const x86_64_int_return_registers[4] = |
| { |
| AX_REG, DX_REG, DI_REG, SI_REG |
| }; |
| |
| /* Additional registers that are clobbered by SYSV calls. */ |
| |
| int const x86_64_ms_sysv_extra_clobbered_registers[12] = |
| { |
| SI_REG, DI_REG, |
| XMM6_REG, XMM7_REG, |
| XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG, |
| XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG |
| }; |
| |
| /* Define the structure for the machine field in struct function. */ |
| |
| struct GTY(()) stack_local_entry { |
| unsigned short mode; |
| unsigned short n; |
| rtx rtl; |
| struct stack_local_entry *next; |
| }; |
| |
| /* Structure describing stack frame layout. |
| Stack grows downward: |
| |
| [arguments] |
| <- ARG_POINTER |
| saved pc |
| |
| saved static chain if ix86_static_chain_on_stack |
| |
| saved frame pointer if frame_pointer_needed |
| <- HARD_FRAME_POINTER |
| [saved regs] |
| <- regs_save_offset |
| [padding0] |
| |
| [saved SSE regs] |
| <- sse_regs_save_offset |
| [padding1] | |
| | <- FRAME_POINTER |
| [va_arg registers] | |
| | |
| [frame] | |
| | |
| [padding2] | = to_allocate |
| <- STACK_POINTER |
| */ |
| struct ix86_frame |
| { |
| int nsseregs; |
| int nregs; |
| int va_arg_size; |
| int red_zone_size; |
| int outgoing_arguments_size; |
| |
| /* The offsets relative to ARG_POINTER. */ |
| HOST_WIDE_INT frame_pointer_offset; |
| HOST_WIDE_INT hard_frame_pointer_offset; |
| HOST_WIDE_INT stack_pointer_offset; |
| HOST_WIDE_INT hfp_save_offset; |
| HOST_WIDE_INT reg_save_offset; |
| HOST_WIDE_INT sse_reg_save_offset; |
| |
| /* When save_regs_using_mov is set, emit prologue using |
| move instead of push instructions. */ |
| bool save_regs_using_mov; |
| }; |
| |
| /* Which cpu are we scheduling for. */ |
| enum attr_cpu ix86_schedule; |
| |
| /* Which cpu are we optimizing for. */ |
| enum processor_type ix86_tune; |
| |
| /* Which instruction set architecture to use. */ |
| enum processor_type ix86_arch; |
| |
| /* True if processor has SSE prefetch instruction. */ |
| unsigned char x86_prefetch_sse; |
| |
| /* -mstackrealign option */ |
| static const char ix86_force_align_arg_pointer_string[] |
| = "force_align_arg_pointer"; |
| |
| static rtx (*ix86_gen_leave) (void); |
| static rtx (*ix86_gen_add3) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); |
| static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); |
| static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); |
| static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); |
| static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); |
| |
| /* Preferred alignment for stack boundary in bits. */ |
| unsigned int ix86_preferred_stack_boundary; |
| |
| /* Alignment for incoming stack boundary in bits specified at |
| command line. */ |
| static unsigned int ix86_user_incoming_stack_boundary; |
| |
| /* Default alignment for incoming stack boundary in bits. */ |
| static unsigned int ix86_default_incoming_stack_boundary; |
| |
| /* Alignment for incoming stack boundary in bits. */ |
| unsigned int ix86_incoming_stack_boundary; |
| |
| /* Calling abi specific va_list type nodes. */ |
| static GTY(()) tree sysv_va_list_type_node; |
| static GTY(()) tree ms_va_list_type_node; |
| |
| /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
| char internal_label_prefix[16]; |
| int internal_label_prefix_len; |
| |
| /* Fence to use after loop using movnt. */ |
| tree x86_mfence; |
| |
| /* Register class used for passing given 64bit part of the argument. |
| These represent classes as documented by the PS ABI, with the exception |
| of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
| use SF or DFmode move instead of DImode to avoid reformatting penalties. |
| |
| Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
| whenever possible (upper half does contain padding). */ |
| enum x86_64_reg_class |
| { |
| X86_64_NO_CLASS, |
| X86_64_INTEGER_CLASS, |
| X86_64_INTEGERSI_CLASS, |
| X86_64_SSE_CLASS, |
| X86_64_SSESF_CLASS, |
| X86_64_SSEDF_CLASS, |
| X86_64_SSEUP_CLASS, |
| X86_64_X87_CLASS, |
| X86_64_X87UP_CLASS, |
| X86_64_COMPLEX_X87_CLASS, |
| X86_64_MEMORY_CLASS |
| }; |
| |
| #define MAX_CLASSES 8 |
| |
| /* Table of constants used by fldpi, fldln2, etc.... */ |
| static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
| static bool ext_80387_constants_init = 0; |
| |
| |
| static struct machine_function * ix86_init_machine_status (void); |
| static rtx ix86_function_value (const_tree, const_tree, bool); |
| static bool ix86_function_value_regno_p (const unsigned int); |
| static unsigned int ix86_function_arg_boundary (enum machine_mode, |
| const_tree); |
| static rtx ix86_static_chain (const_tree, bool); |
| static int ix86_function_regparm (const_tree, const_tree); |
| static void ix86_compute_frame_layout (struct ix86_frame *); |
| static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, |
| rtx, rtx, int); |
| static void ix86_add_new_builtins (HOST_WIDE_INT); |
| static tree ix86_canonical_va_list_type (tree); |
| static void predict_jump (int); |
| static unsigned int split_stack_prologue_scratch_regno (void); |
| static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
| |
| enum ix86_function_specific_strings |
| { |
| IX86_FUNCTION_SPECIFIC_ARCH, |
| IX86_FUNCTION_SPECIFIC_TUNE, |
| IX86_FUNCTION_SPECIFIC_MAX |
| }; |
| |
| static char *ix86_target_string (HOST_WIDE_INT, int, const char *, |
| const char *, enum fpmath_unit, bool); |
| static void ix86_function_specific_save (struct cl_target_option *, |
| struct gcc_options *opts); |
| static void ix86_function_specific_restore (struct gcc_options *opts, |
| struct cl_target_option *); |
| static void ix86_function_specific_print (FILE *, int, |
| struct cl_target_option *); |
| static bool ix86_valid_target_attribute_p (tree, tree, tree, int); |
| static bool ix86_valid_target_attribute_inner_p (tree, char *[], |
| struct gcc_options *, |
| struct gcc_options *, |
| struct gcc_options *); |
| static bool ix86_can_inline_p (tree, tree); |
| static void ix86_set_current_function (tree); |
| static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
| |
| static enum calling_abi ix86_function_abi (const_tree); |
| |
| |
| #ifndef SUBTARGET32_DEFAULT_CPU |
| #define SUBTARGET32_DEFAULT_CPU "i386" |
| #endif |
| |
| /* Whether -mtune= or -march= were specified */ |
| static int ix86_tune_defaulted; |
| static int ix86_arch_specified; |
| |
| /* Vectorization library interface and handlers. */ |
| static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree); |
| |
| static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); |
| static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); |
| |
| /* Processor target table, indexed by processor number */ |
| struct ptt |
| { |
| const char *const name; /* processor name */ |
| const struct processor_costs *cost; /* Processor costs */ |
| const int align_loop; /* Default alignments. */ |
| const int align_loop_max_skip; |
| const int align_jump; |
| const int align_jump_max_skip; |
| const int align_func; |
| }; |
| |
| /* This table must be in sync with enum processor_type in i386.h. */ |
| static const struct ptt processor_target_table[PROCESSOR_max] = |
| { |
| {"generic", &generic_cost, 16, 10, 16, 10, 16}, |
| {"i386", &i386_cost, 4, 3, 4, 3, 4}, |
| {"i486", &i486_cost, 16, 15, 16, 15, 16}, |
| {"pentium", &pentium_cost, 16, 7, 16, 7, 16}, |
| {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16}, |
| {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0}, |
| {"nocona", &nocona_cost, 0, 0, 0, 0, 0}, |
| {"core2", &core_cost, 16, 10, 16, 10, 16}, |
| {"nehalem", &core_cost, 16, 10, 16, 10, 16}, |
| {"sandybridge", &core_cost, 16, 10, 16, 10, 16}, |
| {"haswell", &core_cost, 16, 10, 16, 10, 16}, |
| {"bonnell", &atom_cost, 16, 15, 16, 7, 16}, |
| {"silvermont", &slm_cost, 16, 15, 16, 7, 16}, |
| {"intel", &intel_cost, 16, 15, 16, 7, 16}, |
| {"geode", &geode_cost, 0, 0, 0, 0, 0}, |
| {"k6", &k6_cost, 32, 7, 32, 7, 32}, |
| {"athlon", &athlon_cost, 16, 7, 16, 7, 16}, |
| {"k8", &k8_cost, 16, 7, 16, 7, 16}, |
| {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32}, |
| {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11}, |
| {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11}, |
| {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11}, |
| {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11}, |
| {"btver1", &btver1_cost, 16, 10, 16, 7, 11}, |
| {"btver2", &btver2_cost, 16, 10, 16, 7, 11} |
| }; |
| |
| static unsigned int |
| rest_of_handle_insert_vzeroupper (void) |
| { |
| int i; |
| |
| /* vzeroupper instructions are inserted immediately after reload to |
| account for possible spills from 256bit registers. The pass |
| reuses mode switching infrastructure by re-running mode insertion |
| pass, so disable entities that have already been processed. */ |
| for (i = 0; i < MAX_386_ENTITIES; i++) |
| ix86_optimize_mode_switching[i] = 0; |
| |
| ix86_optimize_mode_switching[AVX_U128] = 1; |
| |
| /* Call optimize_mode_switching. */ |
| g->get_passes ()->execute_pass_mode_switching (); |
| return 0; |
| } |
| |
| namespace { |
| |
| const pass_data pass_data_insert_vzeroupper = |
| { |
| RTL_PASS, /* type */ |
| "vzeroupper", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| true, /* has_execute */ |
| TV_NONE, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_df_finish, /* todo_flags_finish */ |
| }; |
| |
| class pass_insert_vzeroupper : public rtl_opt_pass |
| { |
| public: |
| pass_insert_vzeroupper(gcc::context *ctxt) |
| : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) |
| {} |
| |
| /* opt_pass methods: */ |
| virtual bool gate (function *) |
| { |
| return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER; |
| } |
| |
| virtual unsigned int execute (function *) |
| { |
| return rest_of_handle_insert_vzeroupper (); |
| } |
| |
| }; // class pass_insert_vzeroupper |
| |
| } // anon namespace |
| |
| rtl_opt_pass * |
| make_pass_insert_vzeroupper (gcc::context *ctxt) |
| { |
| return new pass_insert_vzeroupper (ctxt); |
| } |
| |
| /* Return true if a red-zone is in use. */ |
| |
| static inline bool |
| ix86_using_red_zone (void) |
| { |
| return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI; |
| } |
| |
| /* Return a string that documents the current -m options. The caller is |
| responsible for freeing the string. */ |
| |
| static char * |
| ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, |
| const char *tune, enum fpmath_unit fpmath, |
| bool add_nl_p) |
| { |
| struct ix86_target_opts |
| { |
| const char *option; /* option string */ |
| HOST_WIDE_INT mask; /* isa mask options */ |
| }; |
| |
| /* This table is ordered so that options like -msse4.2 that imply |
| preceding options while match those first. */ |
| static struct ix86_target_opts isa_opts[] = |
| { |
| { "-mfma4", OPTION_MASK_ISA_FMA4 }, |
| { "-mfma", OPTION_MASK_ISA_FMA }, |
| { "-mxop", OPTION_MASK_ISA_XOP }, |
| { "-mlwp", OPTION_MASK_ISA_LWP }, |
| { "-mavx512f", OPTION_MASK_ISA_AVX512F }, |
| { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, |
| { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, |
| { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, |
| { "-msse4a", OPTION_MASK_ISA_SSE4A }, |
| { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, |
| { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, |
| { "-mssse3", OPTION_MASK_ISA_SSSE3 }, |
| { "-msse3", OPTION_MASK_ISA_SSE3 }, |
| { "-msse2", OPTION_MASK_ISA_SSE2 }, |
| { "-msse", OPTION_MASK_ISA_SSE }, |
| { "-m3dnow", OPTION_MASK_ISA_3DNOW }, |
| { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, |
| { "-mmmx", OPTION_MASK_ISA_MMX }, |
| { "-mabm", OPTION_MASK_ISA_ABM }, |
| { "-mbmi", OPTION_MASK_ISA_BMI }, |
| { "-mbmi2", OPTION_MASK_ISA_BMI2 }, |
| { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, |
| { "-mhle", OPTION_MASK_ISA_HLE }, |
| { "-mfxsr", OPTION_MASK_ISA_FXSR }, |
| { "-mrdseed", OPTION_MASK_ISA_RDSEED }, |
| { "-mprfchw", OPTION_MASK_ISA_PRFCHW }, |
| { "-madx", OPTION_MASK_ISA_ADX }, |
| { "-mtbm", OPTION_MASK_ISA_TBM }, |
| { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, |
| { "-mmovbe", OPTION_MASK_ISA_MOVBE }, |
| { "-mcrc32", OPTION_MASK_ISA_CRC32 }, |
| { "-maes", OPTION_MASK_ISA_AES }, |
| { "-msha", OPTION_MASK_ISA_SHA }, |
| { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, |
| { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE }, |
| { "-mrdrnd", OPTION_MASK_ISA_RDRND }, |
| { "-mf16c", OPTION_MASK_ISA_F16C }, |
| { "-mrtm", OPTION_MASK_ISA_RTM }, |
| { "-mxsave", OPTION_MASK_ISA_XSAVE }, |
| { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, |
| { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 }, |
| { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT }, |
| { "-mxsavec", OPTION_MASK_ISA_XSAVEC }, |
| { "-mxsaves", OPTION_MASK_ISA_XSAVES }, |
| }; |
| |
| /* Flag options. */ |
| static struct ix86_target_opts flag_opts[] = |
| { |
| { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, |
| { "-mlong-double-128", MASK_LONG_DOUBLE_128 }, |
| { "-mlong-double-64", MASK_LONG_DOUBLE_64 }, |
| { "-m80387", MASK_80387 }, |
| { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, |
| { "-malign-double", MASK_ALIGN_DOUBLE }, |
| { "-mcld", MASK_CLD }, |
| { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, |
| { "-mieee-fp", MASK_IEEE_FP }, |
| { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, |
| { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, |
| { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, |
| { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, |
| { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, |
| { "-mno-push-args", MASK_NO_PUSH_ARGS }, |
| { "-mno-red-zone", MASK_NO_RED_ZONE }, |
| { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, |
| { "-mrecip", MASK_RECIP }, |
| { "-mrtd", MASK_RTD }, |
| { "-msseregparm", MASK_SSEREGPARM }, |
| { "-mstack-arg-probe", MASK_STACK_PROBE }, |
| { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, |
| { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, |
| { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, |
| { "-mvzeroupper", MASK_VZEROUPPER }, |
| { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, |
| { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE}, |
| { "-mprefer-avx128", MASK_PREFER_AVX128}, |
| }; |
| |
| const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; |
| |
| char isa_other[40]; |
| char target_other[40]; |
| unsigned num = 0; |
| unsigned i, j; |
| char *ret; |
| char *ptr; |
| size_t len; |
| size_t line_len; |
| size_t sep_len; |
| const char *abi; |
| |
| memset (opts, '\0', sizeof (opts)); |
| |
| /* Add -march= option. */ |
| if (arch) |
| { |
| opts[num][0] = "-march="; |
| opts[num++][1] = arch; |
| } |
| |
| /* Add -mtune= option. */ |
| if (tune) |
| { |
| opts[num][0] = "-mtune="; |
| opts[num++][1] = tune; |
| } |
| |
| /* Add -m32/-m64/-mx32. */ |
| if ((isa & OPTION_MASK_ISA_64BIT) != 0) |
| { |
| if ((isa & OPTION_MASK_ABI_64) != 0) |
| abi = "-m64"; |
| else |
| abi = "-mx32"; |
| isa &= ~ (OPTION_MASK_ISA_64BIT |
| | OPTION_MASK_ABI_64 |
| | OPTION_MASK_ABI_X32); |
| } |
| else |
| abi = "-m32"; |
| opts[num++][0] = abi; |
| |
| /* Pick out the options in isa options. */ |
| for (i = 0; i < ARRAY_SIZE (isa_opts); i++) |
| { |
| if ((isa & isa_opts[i].mask) != 0) |
| { |
| opts[num++][0] = isa_opts[i].option; |
| isa &= ~ isa_opts[i].mask; |
| } |
| } |
| |
| if (isa && add_nl_p) |
| { |
| opts[num++][0] = isa_other; |
| sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", |
| isa); |
| } |
| |
| /* Add flag options. */ |
| for (i = 0; i < ARRAY_SIZE (flag_opts); i++) |
| { |
| if ((flags & flag_opts[i].mask) != 0) |
| { |
| opts[num++][0] = flag_opts[i].option; |
| flags &= ~ flag_opts[i].mask; |
| } |
| } |
| |
| if (flags && add_nl_p) |
| { |
| opts[num++][0] = target_other; |
| sprintf (target_other, "(other flags: %#x)", flags); |
| } |
| |
| /* Add -fpmath= option. */ |
| if (fpmath) |
| { |
| opts[num][0] = "-mfpmath="; |
| switch ((int) fpmath) |
| { |
| case FPMATH_387: |
| opts[num++][1] = "387"; |
| break; |
| |
| case FPMATH_SSE: |
| opts[num++][1] = "sse"; |
| break; |
| |
| case FPMATH_387 | FPMATH_SSE: |
| opts[num++][1] = "sse+387"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Any options? */ |
| if (num == 0) |
| return NULL; |
| |
| gcc_assert (num < ARRAY_SIZE (opts)); |
| |
| /* Size the string. */ |
| len = 0; |
| sep_len = (add_nl_p) ? 3 : 1; |
| for (i = 0; i < num; i++) |
| { |
| len += sep_len; |
| for (j = 0; j < 2; j++) |
| if (opts[i][j]) |
| len += strlen (opts[i][j]); |
| } |
| |
| /* Build the string. */ |
| ret = ptr = (char *) xmalloc (len); |
| line_len = 0; |
| |
| for (i = 0; i < num; i++) |
| { |
| size_t len2[2]; |
| |
| for (j = 0; j < 2; j++) |
| len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; |
| |
| if (i != 0) |
| { |
| *ptr++ = ' '; |
| line_len++; |
| |
| if (add_nl_p && line_len + len2[0] + len2[1] > 70) |
| { |
| *ptr++ = '\\'; |
| *ptr++ = '\n'; |
| line_len = 0; |
| } |
| } |
| |
| for (j = 0; j < 2; j++) |
| if (opts[i][j]) |
| { |
| memcpy (ptr, opts[i][j], len2[j]); |
| ptr += len2[j]; |
| line_len += len2[j]; |
| } |
| } |
| |
| *ptr = '\0'; |
| gcc_assert (ret + len >= ptr); |
| |
| return ret; |
| } |
| |
| /* Return true, if profiling code should be emitted before |
| prologue. Otherwise it returns false. |
| Note: For x86 with "hotfix" it is sorried. */ |
| static bool |
| ix86_profile_before_prologue (void) |
| { |
| return flag_fentry != 0; |
| } |
| |
| /* Function that is callable from the debugger to print the current |
| options. */ |
| void ATTRIBUTE_UNUSED |
| ix86_debug_options (void) |
| { |
| char *opts = ix86_target_string (ix86_isa_flags, target_flags, |
| ix86_arch_string, ix86_tune_string, |
| ix86_fpmath, true); |
| |
| if (opts) |
| { |
| fprintf (stderr, "%s\n\n", opts); |
| free (opts); |
| } |
| else |
| fputs ("<no options>\n\n", stderr); |
| |
| return; |
| } |
| |
| static const char *stringop_alg_names[] = { |
| #define DEF_ENUM |
| #define DEF_ALG(alg, name) #name, |
| #include "stringop.def" |
| #undef DEF_ENUM |
| #undef DEF_ALG |
| }; |
| |
| /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. |
| The string is of the following form (or comma separated list of it): |
| |
| strategy_alg:max_size:[align|noalign] |
| |
| where the full size range for the strategy is either [0, max_size] or |
| [min_size, max_size], in which min_size is the max_size + 1 of the |
| preceding range. The last size range must have max_size == -1. |
| |
| Examples: |
| |
| 1. |
| -mmemcpy-strategy=libcall:-1:noalign |
| |
| this is equivalent to (for known size memcpy) -mstringop-strategy=libcall |
| |
| |
| 2. |
| -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign |
| |
| This is to tell the compiler to use the following strategy for memset |
| 1) when the expected size is between [1, 16], use rep_8byte strategy; |
| 2) when the size is between [17, 2048], use vector_loop; |
| 3) when the size is > 2048, use libcall. */ |
| |
| struct stringop_size_range |
| { |
| int max; |
| stringop_alg alg; |
| bool noalign; |
| }; |
| |
| static void |
| ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) |
| { |
| const struct stringop_algs *default_algs; |
| stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; |
| char *curr_range_str, *next_range_str; |
| int i = 0, n = 0; |
| |
| if (is_memset) |
| default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; |
| else |
| default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; |
| |
| curr_range_str = strategy_str; |
| |
| do |
| { |
| int maxs; |
| char alg_name[128]; |
| char align[16]; |
| next_range_str = strchr (curr_range_str, ','); |
| if (next_range_str) |
| *next_range_str++ = '\0'; |
| |
| if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s", |
| alg_name, &maxs, align)) |
| { |
| error ("wrong arg %s to option %s", curr_range_str, |
| is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| |
| if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) |
| { |
| error ("size ranges of option %s should be increasing", |
| is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| |
| for (i = 0; i < last_alg; i++) |
| if (!strcmp (alg_name, stringop_alg_names[i])) |
| break; |
| |
| if (i == last_alg) |
| { |
| error ("wrong stringop strategy name %s specified for option %s", |
| alg_name, |
| is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| |
| input_ranges[n].max = maxs; |
| input_ranges[n].alg = (stringop_alg) i; |
| if (!strcmp (align, "align")) |
| input_ranges[n].noalign = false; |
| else if (!strcmp (align, "noalign")) |
| input_ranges[n].noalign = true; |
| else |
| { |
| error ("unknown alignment %s specified for option %s", |
| align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| n++; |
| curr_range_str = next_range_str; |
| } |
| while (curr_range_str); |
| |
| if (input_ranges[n - 1].max != -1) |
| { |
| error ("the max value for the last size range should be -1" |
| " for option %s", |
| is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| |
| if (n > MAX_STRINGOP_ALGS) |
| { |
| error ("too many size ranges specified in option %s", |
| is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); |
| return; |
| } |
| |
| /* Now override the default algs array. */ |
| for (i = 0; i < n; i++) |
| { |
| *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max; |
| *const_cast<stringop_alg *>(&default_algs->size[i].alg) |
| = input_ranges[i].alg; |
| *const_cast<int *>(&default_algs->size[i].noalign) |
| = input_ranges[i].noalign; |
| } |
| } |
| |
| |
| /* parse -mtune-ctrl= option. When DUMP is true, |
| print the features that are explicitly set. */ |
| |
| static void |
| parse_mtune_ctrl_str (bool dump) |
| { |
| if (!ix86_tune_ctrl_string) |
| return; |
| |
| char *next_feature_string = NULL; |
| char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); |
| char *orig = curr_feature_string; |
| int i; |
| do |
| { |
| bool clear = false; |
| |
| next_feature_string = strchr (curr_feature_string, ','); |
| if (next_feature_string) |
| *next_feature_string++ = '\0'; |
| if (*curr_feature_string == '^') |
| { |
| curr_feature_string++; |
| clear = true; |
| } |
| for (i = 0; i < X86_TUNE_LAST; i++) |
| { |
| if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) |
| { |
| ix86_tune_features[i] = !clear; |
| if (dump) |
| fprintf (stderr, "Explicitly %s feature %s\n", |
| clear ? "clear" : "set", ix86_tune_feature_names[i]); |
| break; |
| } |
| } |
| if (i == X86_TUNE_LAST) |
| error ("Unknown parameter to option -mtune-ctrl: %s", |
| clear ? curr_feature_string - 1 : curr_feature_string); |
| curr_feature_string = next_feature_string; |
| } |
| while (curr_feature_string); |
| free (orig); |
| } |
| |
| /* Helper function to set ix86_tune_features. IX86_TUNE is the |
| processor type. */ |
| |
| static void |
| set_ix86_tune_features (enum processor_type ix86_tune, bool dump) |
| { |
| unsigned int ix86_tune_mask = 1u << ix86_tune; |
| int i; |
| |
| for (i = 0; i < X86_TUNE_LAST; ++i) |
| { |
| if (ix86_tune_no_default) |
| ix86_tune_features[i] = 0; |
| else |
| ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); |
| } |
| |
| if (dump) |
| { |
| fprintf (stderr, "List of x86 specific tuning parameter names:\n"); |
| for (i = 0; i < X86_TUNE_LAST; i++) |
| fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], |
| ix86_tune_features[i] ? "on" : "off"); |
| } |
| |
| parse_mtune_ctrl_str (dump); |
| } |
| |
| |
| /* Override various settings based on options. If MAIN_ARGS_P, the |
| options are from the command line, otherwise they are from |
| attributes. */ |
| |
| static void |
| ix86_option_override_internal (bool main_args_p, |
| struct gcc_options *opts, |
| struct gcc_options *opts_set) |
| { |
| int i; |
| unsigned int ix86_arch_mask; |
| const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); |
| const char *prefix; |
| const char *suffix; |
| const char *sw; |
| |
| #define PTA_3DNOW (HOST_WIDE_INT_1 << 0) |
| #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1) |
| #define PTA_64BIT (HOST_WIDE_INT_1 << 2) |
| #define PTA_ABM (HOST_WIDE_INT_1 << 3) |
| #define PTA_AES (HOST_WIDE_INT_1 << 4) |
| #define PTA_AVX (HOST_WIDE_INT_1 << 5) |
| #define PTA_BMI (HOST_WIDE_INT_1 << 6) |
| #define PTA_CX16 (HOST_WIDE_INT_1 << 7) |
| #define PTA_F16C (HOST_WIDE_INT_1 << 8) |
| #define PTA_FMA (HOST_WIDE_INT_1 << 9) |
| #define PTA_FMA4 (HOST_WIDE_INT_1 << 10) |
| #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11) |
| #define PTA_LWP (HOST_WIDE_INT_1 << 12) |
| #define PTA_LZCNT (HOST_WIDE_INT_1 << 13) |
| #define PTA_MMX (HOST_WIDE_INT_1 << 14) |
| #define PTA_MOVBE (HOST_WIDE_INT_1 << 15) |
| #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16) |
| #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17) |
| #define PTA_POPCNT (HOST_WIDE_INT_1 << 18) |
| #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19) |
| #define PTA_RDRND (HOST_WIDE_INT_1 << 20) |
| #define PTA_SSE (HOST_WIDE_INT_1 << 21) |
| #define PTA_SSE2 (HOST_WIDE_INT_1 << 22) |
| #define PTA_SSE3 (HOST_WIDE_INT_1 << 23) |
| #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24) |
| #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25) |
| #define PTA_SSE4A (HOST_WIDE_INT_1 << 26) |
| #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27) |
| #define PTA_TBM (HOST_WIDE_INT_1 << 28) |
| #define PTA_XOP (HOST_WIDE_INT_1 << 29) |
| #define PTA_AVX2 (HOST_WIDE_INT_1 << 30) |
| #define PTA_BMI2 (HOST_WIDE_INT_1 << 31) |
| #define PTA_RTM (HOST_WIDE_INT_1 << 32) |
| #define PTA_HLE (HOST_WIDE_INT_1 << 33) |
| #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34) |
| #define PTA_RDSEED (HOST_WIDE_INT_1 << 35) |
| #define PTA_ADX (HOST_WIDE_INT_1 << 36) |
| #define PTA_FXSR (HOST_WIDE_INT_1 << 37) |
| #define PTA_XSAVE (HOST_WIDE_INT_1 << 38) |
| #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) |
| #define PTA_AVX512F (HOST_WIDE_INT_1 << 40) |
| #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41) |
| #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42) |
| #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43) |
| #define PTA_SHA (HOST_WIDE_INT_1 << 45) |
| #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46) |
| #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47) |
| #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48) |
| #define PTA_XSAVES (HOST_WIDE_INT_1 << 49) |
| |
| #define PTA_CORE2 \ |
| (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ |
| | PTA_CX16 | PTA_FXSR) |
| #define PTA_NEHALEM \ |
| (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT) |
| #define PTA_WESTMERE \ |
| (PTA_NEHALEM | PTA_AES | PTA_PCLMUL) |
| #define PTA_SANDYBRIDGE \ |
| (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT) |
| #define PTA_IVYBRIDGE \ |
| (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C) |
| #define PTA_HASWELL \ |
| (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \ |
| | PTA_FMA | PTA_MOVBE | PTA_HLE) |
| #define PTA_BROADWELL \ |
| (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) |
| #define PTA_BONNELL \ |
| (PTA_CORE2 | PTA_MOVBE) |
| #define PTA_SILVERMONT \ |
| (PTA_WESTMERE | PTA_MOVBE) |
| |
| /* if this reaches 64, need to widen struct pta flags below */ |
| |
| static struct pta |
| { |
| const char *const name; /* processor name or nickname. */ |
| const enum processor_type processor; |
| const enum attr_cpu schedule; |
| const unsigned HOST_WIDE_INT flags; |
| } |
| const processor_alias_table[] = |
| { |
| {"i386", PROCESSOR_I386, CPU_NONE, 0}, |
| {"i486", PROCESSOR_I486, CPU_NONE, 0}, |
| {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, |
| {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, |
| {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, |
| {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, |
| {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, |
| {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, |
| {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
| PTA_MMX | PTA_SSE | PTA_FXSR}, |
| {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, |
| {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, |
| {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR}, |
| {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
| PTA_MMX | PTA_SSE | PTA_FXSR}, |
| {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
| PTA_MMX | PTA_SSE | PTA_FXSR}, |
| {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, |
| PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
| {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE, |
| PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
| {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE, |
| PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, |
| {"prescott", PROCESSOR_NOCONA, CPU_NONE, |
| PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, |
| {"nocona", PROCESSOR_NOCONA, CPU_NONE, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR}, |
| {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2}, |
| {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, |
| {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, |
| {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE}, |
| {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
| PTA_SANDYBRIDGE}, |
| {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
| PTA_SANDYBRIDGE}, |
| {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
| PTA_IVYBRIDGE}, |
| {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, |
| PTA_IVYBRIDGE}, |
| {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL}, |
| {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL}, |
| {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL}, |
| {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, |
| {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, |
| {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, |
| {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, |
| {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, |
| {"geode", PROCESSOR_GEODE, CPU_GEODE, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, |
| {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, |
| {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, |
| {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, |
| {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, |
| {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, |
| {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR}, |
| {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR}, |
| {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, |
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR}, |
| {"x86-64", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, |
| {"k8", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"k8-sse3", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"opteron", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"opteron-sse3", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"athlon64", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"athlon64-sse3", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"athlon-fx", PROCESSOR_K8, CPU_K8, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
| | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, |
| {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 |
| | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, |
| {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, |
| PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 |
| | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, |
| {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
| | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
| | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, |
| {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
| | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
| | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C |
| | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, |
| {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
| | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 |
| | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C |
| | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE |
| | PTA_XSAVEOPT | PTA_FSGSBASE}, |
| {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 |
| | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 |
| | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 |
| | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR |
| | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, |
| {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW |
| | PTA_FXSR | PTA_XSAVE}, |
| {"btver2", PROCESSOR_BTVER2, CPU_BTVER2, |
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1 |
| | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX |
| | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW |
| | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT}, |
| |
| {"generic", PROCESSOR_GENERIC, CPU_GENERIC, |
| PTA_64BIT |
| | PTA_HLE /* flags are only used for -march switch. */ }, |
| }; |
| |
| /* -mrecip options. */ |
| static struct |
| { |
| const char *string; /* option name */ |
| unsigned int mask; /* mask bits to set */ |
| } |
| const recip_options[] = |
| { |
| { "all", RECIP_MASK_ALL }, |
| { "none", RECIP_MASK_NONE }, |
| { "div", RECIP_MASK_DIV }, |
| { "sqrt", RECIP_MASK_SQRT }, |
| { "vec-div", RECIP_MASK_VEC_DIV }, |
| { "vec-sqrt", RECIP_MASK_VEC_SQRT }, |
| }; |
| |
| int const pta_size = ARRAY_SIZE (processor_alias_table); |
| |
| /* Set up prefix/suffix so the error messages refer to either the command |
| line argument, or the attribute(target). */ |
| if (main_args_p) |
| { |
| prefix = "-m"; |
| suffix = ""; |
| sw = "switch"; |
| } |
| else |
| { |
| prefix = "option(\""; |
| suffix = "\")"; |
| sw = "attribute"; |
| } |
| |
| /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if |
| TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ |
| if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); |
| #ifdef TARGET_BI_ARCH |
| else |
| { |
| #if TARGET_BI_ARCH == 1 |
| /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 |
| is on and OPTION_MASK_ABI_X32 is off. We turn off |
| OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by |
| -mx32. */ |
| if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; |
| #else |
| /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is |
| on and OPTION_MASK_ABI_64 is off. We turn off |
| OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by |
| -m64. */ |
| if (TARGET_LP64_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; |
| #endif |
| } |
| #endif |
| |
| if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
| { |
| /* Always turn on OPTION_MASK_ISA_64BIT and turn off |
| OPTION_MASK_ABI_64 for TARGET_X32. */ |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; |
| opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; |
| } |
| else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT |
| | OPTION_MASK_ABI_X32 |
| | OPTION_MASK_ABI_64); |
| else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) |
| { |
| /* Always turn on OPTION_MASK_ISA_64BIT and turn off |
| OPTION_MASK_ABI_X32 for TARGET_LP64. */ |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; |
| opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; |
| } |
| |
| #ifdef SUBTARGET_OVERRIDE_OPTIONS |
| SUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS |
| SUBSUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| /* -fPIC is the default for x86_64. */ |
| if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_flag_pic = 2; |
| |
| /* Need to check -mtune=generic first. */ |
| if (opts->x_ix86_tune_string) |
| { |
| /* As special support for cross compilers we read -mtune=native |
| as -mtune=generic. With native compilers we won't see the |
| -mtune=native, as it was changed by the driver. */ |
| if (!strcmp (opts->x_ix86_tune_string, "native")) |
| { |
| opts->x_ix86_tune_string = "generic"; |
| } |
| else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) |
| warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use " |
| "%stune=k8%s or %stune=generic%s instead as appropriate", |
| prefix, suffix, prefix, suffix, prefix, suffix); |
| } |
| else |
| { |
| if (opts->x_ix86_arch_string) |
| opts->x_ix86_tune_string = opts->x_ix86_arch_string; |
| if (!opts->x_ix86_tune_string) |
| { |
| opts->x_ix86_tune_string |
| = processor_target_table[TARGET_CPU_DEFAULT].name; |
| ix86_tune_defaulted = 1; |
| } |
| |
| /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string |
| or defaulted. We need to use a sensible tune option. */ |
| if (!strcmp (opts->x_ix86_tune_string, "x86-64")) |
| { |
| opts->x_ix86_tune_string = "generic"; |
| } |
| } |
| |
| if (opts->x_ix86_stringop_alg == rep_prefix_8_byte |
| && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| { |
| /* rep; movq isn't available in 32-bit code. */ |
| error ("-mstringop-strategy=rep_8byte not supported for 32-bit code"); |
| opts->x_ix86_stringop_alg = no_stringop; |
| } |
| |
| if (!opts->x_ix86_arch_string) |
| opts->x_ix86_arch_string |
| = TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| ? "x86-64" : SUBTARGET32_DEFAULT_CPU; |
| else |
| ix86_arch_specified = 1; |
| |
| if (opts_set->x_ix86_pmode) |
| { |
| if ((TARGET_LP64_P (opts->x_ix86_isa_flags) |
| && opts->x_ix86_pmode == PMODE_SI) |
| || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| && opts->x_ix86_pmode == PMODE_DI)) |
| error ("address mode %qs not supported in the %s bit mode", |
| TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", |
| TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); |
| } |
| else |
| opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) |
| ? PMODE_DI : PMODE_SI; |
| |
| if (!opts_set->x_ix86_abi) |
| opts->x_ix86_abi = DEFAULT_ABI; |
| |
| /* For targets using ms ABI enable ms-extensions, if not |
| explicit turned off. For non-ms ABI we turn off this |
| option. */ |
| if (!opts_set->x_flag_ms_extensions) |
| opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); |
| |
| if (opts_set->x_ix86_cmodel) |
| { |
| switch (opts->x_ix86_cmodel) |
| { |
| case CM_SMALL: |
| case CM_SMALL_PIC: |
| if (opts->x_flag_pic) |
| opts->x_ix86_cmodel = CM_SMALL_PIC; |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in the %s bit mode", |
| "small", "32"); |
| break; |
| |
| case CM_MEDIUM: |
| case CM_MEDIUM_PIC: |
| if (opts->x_flag_pic) |
| opts->x_ix86_cmodel = CM_MEDIUM_PIC; |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in the %s bit mode", |
| "medium", "32"); |
| else if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in x32 mode", |
| "medium"); |
| break; |
| |
| case CM_LARGE: |
| case CM_LARGE_PIC: |
| if (opts->x_flag_pic) |
| opts->x_ix86_cmodel = CM_LARGE_PIC; |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in the %s bit mode", |
| "large", "32"); |
| else if (TARGET_X32_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in x32 mode", |
| "large"); |
| break; |
| |
| case CM_32: |
| if (opts->x_flag_pic) |
| error ("code model %s does not support PIC mode", "32"); |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in the %s bit mode", |
| "32", "64"); |
| break; |
| |
| case CM_KERNEL: |
| if (opts->x_flag_pic) |
| { |
| error ("code model %s does not support PIC mode", "kernel"); |
| opts->x_ix86_cmodel = CM_32; |
| } |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| error ("code model %qs not supported in the %s bit mode", |
| "kernel", "32"); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| else |
| { |
| /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the |
| use of rip-relative addressing. This eliminates fixups that |
| would otherwise be needed if this object is to be placed in a |
| DLL, and is essentially just as efficient as direct addressing. */ |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| && (TARGET_RDOS || TARGET_PECOFF)) |
| opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; |
| else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; |
| else |
| opts->x_ix86_cmodel = CM_32; |
| } |
| if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) |
| { |
| error ("-masm=intel not supported in this configuration"); |
| opts->x_ix86_asm_dialect = ASM_ATT; |
| } |
| if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) |
| != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) |
| sorry ("%i-bit mode not compiled in", |
| (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); |
| |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) |
| { |
| ix86_schedule = processor_alias_table[i].schedule; |
| ix86_arch = processor_alias_table[i].processor; |
| /* Default cpu tuning to the architecture. */ |
| ix86_tune = ix86_arch; |
| |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| && !(processor_alias_table[i].flags & PTA_64BIT)) |
| error ("CPU you selected does not support x86-64 " |
| "instruction set"); |
| |
| if (processor_alias_table[i].flags & PTA_MMX |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; |
| if (processor_alias_table[i].flags & PTA_3DNOW |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; |
| if (processor_alias_table[i].flags & PTA_3DNOW_A |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; |
| if (processor_alias_table[i].flags & PTA_SSE |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; |
| if (processor_alias_table[i].flags & PTA_SSE2 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; |
| if (processor_alias_table[i].flags & PTA_SSE3 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; |
| if (processor_alias_table[i].flags & PTA_SSSE3 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; |
| if (processor_alias_table[i].flags & PTA_SSE4_1 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; |
| if (processor_alias_table[i].flags & PTA_SSE4_2 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; |
| if (processor_alias_table[i].flags & PTA_AVX |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; |
| if (processor_alias_table[i].flags & PTA_AVX2 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; |
| if (processor_alias_table[i].flags & PTA_FMA |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; |
| if (processor_alias_table[i].flags & PTA_SSE4A |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; |
| if (processor_alias_table[i].flags & PTA_FMA4 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; |
| if (processor_alias_table[i].flags & PTA_XOP |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; |
| if (processor_alias_table[i].flags & PTA_LWP |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; |
| if (processor_alias_table[i].flags & PTA_ABM |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; |
| if (processor_alias_table[i].flags & PTA_BMI |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; |
| if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM) |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; |
| if (processor_alias_table[i].flags & PTA_TBM |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; |
| if (processor_alias_table[i].flags & PTA_BMI2 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; |
| if (processor_alias_table[i].flags & PTA_CX16 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; |
| if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; |
| if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| && (processor_alias_table[i].flags & PTA_NO_SAHF)) |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; |
| if (processor_alias_table[i].flags & PTA_MOVBE |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE; |
| if (processor_alias_table[i].flags & PTA_AES |
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) |
| ix86_isa_flags |= OPTION_MASK_ISA_AES; |
| if (processor_alias_table[i].flags & PTA_SHA |
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) |
| ix86_isa_flags |= OPTION_MASK_ISA_SHA; |
| if (processor_alias_table[i].flags & PTA_PCLMUL |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; |
| if (processor_alias_table[i].flags & PTA_FSGSBASE |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; |
| if (processor_alias_table[i].flags & PTA_RDRND |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; |
| if (processor_alias_table[i].flags & PTA_F16C |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; |
| if (processor_alias_table[i].flags & PTA_RTM |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; |
| if (processor_alias_table[i].flags & PTA_HLE |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE; |
| if (processor_alias_table[i].flags & PTA_PRFCHW |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; |
| if (processor_alias_table[i].flags & PTA_RDSEED |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; |
| if (processor_alias_table[i].flags & PTA_ADX |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; |
| if (processor_alias_table[i].flags & PTA_FXSR |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; |
| if (processor_alias_table[i].flags & PTA_XSAVE |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; |
| if (processor_alias_table[i].flags & PTA_XSAVEOPT |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; |
| if (processor_alias_table[i].flags & PTA_AVX512F |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; |
| if (processor_alias_table[i].flags & PTA_AVX512ER |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; |
| if (processor_alias_table[i].flags & PTA_AVX512PF |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; |
| if (processor_alias_table[i].flags & PTA_AVX512CD |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; |
| if (processor_alias_table[i].flags & PTA_PREFETCHWT1 |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; |
| if (processor_alias_table[i].flags & PTA_CLFLUSHOPT |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; |
| if (processor_alias_table[i].flags & PTA_XSAVEC |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; |
| if (processor_alias_table[i].flags & PTA_XSAVES |
| && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) |
| opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; |
| if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) |
| x86_prefetch_sse = true; |
| |
| break; |
| } |
| |
| if (!strcmp (opts->x_ix86_arch_string, "generic")) |
| error ("generic CPU can be used only for %stune=%s %s", |
| prefix, suffix, sw); |
| else if (!strcmp (opts->x_ix86_arch_string, "intel")) |
| error ("intel CPU can be used only for %stune=%s %s", |
| prefix, suffix, sw); |
| else if (i == pta_size) |
| error ("bad value (%s) for %sarch=%s %s", |
| opts->x_ix86_arch_string, prefix, suffix, sw); |
| |
| ix86_arch_mask = 1u << ix86_arch; |
| for (i = 0; i < X86_ARCH_LAST; ++i) |
| ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); |
| |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) |
| { |
| ix86_schedule = processor_alias_table[i].schedule; |
| ix86_tune = processor_alias_table[i].processor; |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| { |
| if (!(processor_alias_table[i].flags & PTA_64BIT)) |
| { |
| if (ix86_tune_defaulted) |
| { |
| opts->x_ix86_tune_string = "x86-64"; |
| for (i = 0; i < pta_size; i++) |
| if (! strcmp (opts->x_ix86_tune_string, |
| processor_alias_table[i].name)) |
| break; |
| ix86_schedule = processor_alias_table[i].schedule; |
| ix86_tune = processor_alias_table[i].processor; |
| } |
| else |
| error ("CPU you selected does not support x86-64 " |
| "instruction set"); |
| } |
| } |
| /* Intel CPUs have always interpreted SSE prefetch instructions as |
| NOPs; so, we can enable SSE prefetch instructions even when |
| -mtune (rather than -march) points us to a processor that has them. |
| However, the VIA C3 gives a SIGILL, so we only do that for i686 and |
| higher processors. */ |
| if (TARGET_CMOV |
| && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) |
| x86_prefetch_sse = true; |
| break; |
| } |
| |
| if (ix86_tune_specified && i == pta_size) |
| error ("bad value (%s) for %stune=%s %s", |
| opts->x_ix86_tune_string, prefix, suffix, sw); |
| |
| set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); |
| |
| #ifndef USE_IX86_FRAME_POINTER |
| #define USE_IX86_FRAME_POINTER 0 |
| #endif |
| |
| #ifndef USE_X86_64_FRAME_POINTER |
| #define USE_X86_64_FRAME_POINTER 0 |
| #endif |
| |
| /* Set the default values for switches whose default depends on TARGET_64BIT |
| in case they weren't overwritten by command line options. */ |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| { |
| if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) |
| opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; |
| if (opts->x_flag_asynchronous_unwind_tables |
| && !opts_set->x_flag_unwind_tables |
| && TARGET_64BIT_MS_ABI) |
| opts->x_flag_unwind_tables = 1; |
| if (opts->x_flag_asynchronous_unwind_tables == 2) |
| opts->x_flag_unwind_tables |
| = opts->x_flag_asynchronous_unwind_tables = 1; |
| if (opts->x_flag_pcc_struct_return == 2) |
| opts->x_flag_pcc_struct_return = 0; |
| } |
| else |
| { |
| if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) |
| opts->x_flag_omit_frame_pointer |
| = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); |
| if (opts->x_flag_asynchronous_unwind_tables == 2) |
| opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; |
| if (opts->x_flag_pcc_struct_return == 2) |
| opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; |
| } |
| |
| ix86_tune_cost = processor_target_table[ix86_tune].cost; |
| if (opts->x_optimize_size) |
| ix86_cost = &ix86_size_cost; |
| else |
| ix86_cost = ix86_tune_cost; |
| |
| /* Arrange to set up i386_stack_locals for all functions. */ |
| init_machine_status = ix86_init_machine_status; |
| |
| /* Validate -mregparm= value. */ |
| if (opts_set->x_ix86_regparm) |
| { |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| warning (0, "-mregparm is ignored in 64-bit mode"); |
| if (opts->x_ix86_regparm > REGPARM_MAX) |
| { |
| error ("-mregparm=%d is not between 0 and %d", |
| opts->x_ix86_regparm, REGPARM_MAX); |
| opts->x_ix86_regparm = 0; |
| } |
| } |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_regparm = REGPARM_MAX; |
| |
| /* Default align_* from the processor table. */ |
| if (opts->x_align_loops == 0) |
| { |
| opts->x_align_loops = processor_target_table[ix86_tune].align_loop; |
| align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; |
| } |
| if (opts->x_align_jumps == 0) |
| { |
| opts->x_align_jumps = processor_target_table[ix86_tune].align_jump; |
| align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; |
| } |
| if (opts->x_align_functions == 0) |
| { |
| opts->x_align_functions = processor_target_table[ix86_tune].align_func; |
| } |
| |
| /* Provide default for -mbranch-cost= value. */ |
| if (!opts_set->x_ix86_branch_cost) |
| opts->x_ix86_branch_cost = ix86_cost->branch_cost; |
| |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| { |
| opts->x_target_flags |
| |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; |
| |
| /* Enable by default the SSE and MMX builtins. Do allow the user to |
| explicitly disable any of these. In particular, disabling SSE and |
| MMX for kernel code is extremely useful. */ |
| if (!ix86_arch_specified) |
| opts->x_ix86_isa_flags |
| |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX |
| | TARGET_SUBTARGET64_ISA_DEFAULT) |
| & ~opts->x_ix86_isa_flags_explicit); |
| |
| if (TARGET_RTD_P (opts->x_target_flags)) |
| warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); |
| } |
| else |
| { |
| opts->x_target_flags |
| |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; |
| |
| if (!ix86_arch_specified) |
| opts->x_ix86_isa_flags |
| |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; |
| |
| /* i386 ABI does not specify red zone. It still makes sense to use it |
| when programmer takes care to stack from being destroyed. */ |
| if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) |
| opts->x_target_flags |= MASK_NO_RED_ZONE; |
| } |
| |
| /* Keep nonleaf frame pointers. */ |
| if (opts->x_flag_omit_frame_pointer) |
| opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; |
| else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) |
| opts->x_flag_omit_frame_pointer = 1; |
| |
| /* If we're doing fast math, we don't care about comparison order |
| wrt NaNs. This lets us use a shorter comparison sequence. */ |
| if (opts->x_flag_finite_math_only) |
| opts->x_target_flags &= ~MASK_IEEE_FP; |
| |
| /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, |
| since the insns won't need emulation. */ |
| if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) |
| opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; |
| |
| /* Likewise, if the target doesn't have a 387, or we've specified |
| software floating point, don't use 387 inline intrinsics. */ |
| if (!TARGET_80387_P (opts->x_target_flags)) |
| opts->x_target_flags |= MASK_NO_FANCY_MATH_387; |
| |
| /* Turn on MMX builtins for -msse. */ |
| if (TARGET_SSE_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags |
| |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; |
| |
| /* Enable SSE prefetch. */ |
| if (TARGET_SSE_P (opts->x_ix86_isa_flags) |
| || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))) |
| x86_prefetch_sse = true; |
| |
| /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */ |
| if (TARGET_3DNOW_P (opts->x_ix86_isa_flags) |
| || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags |
| |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit; |
| |
| /* Enable popcnt instruction for -msse4.2 or -mabm. */ |
| if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) |
| || TARGET_ABM_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags |
| |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; |
| |
| /* Enable lzcnt instruction for -mabm. */ |
| if (TARGET_ABM_P(opts->x_ix86_isa_flags)) |
| opts->x_ix86_isa_flags |
| |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; |
| |
| /* Validate -mpreferred-stack-boundary= value or default it to |
| PREFERRED_STACK_BOUNDARY_DEFAULT. */ |
| ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; |
| if (opts_set->x_ix86_preferred_stack_boundary_arg) |
| { |
| int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2); |
| int max = (TARGET_SEH ? 4 : 12); |
| |
| if (opts->x_ix86_preferred_stack_boundary_arg < min |
| || opts->x_ix86_preferred_stack_boundary_arg > max) |
| { |
| if (min == max) |
| error ("-mpreferred-stack-boundary is not supported " |
| "for this target"); |
| else |
| error ("-mpreferred-stack-boundary=%d is not between %d and %d", |
| opts->x_ix86_preferred_stack_boundary_arg, min, max); |
| } |
| else |
| ix86_preferred_stack_boundary |
| = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; |
| } |
| |
| /* Set the default value for -mstackrealign. */ |
| if (opts->x_ix86_force_align_arg_pointer == -1) |
| opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; |
| |
| ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; |
| |
| /* Validate -mincoming-stack-boundary= value or default it to |
| MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ |
| ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
| if (opts_set->x_ix86_incoming_stack_boundary_arg) |
| { |
| if (opts->x_ix86_incoming_stack_boundary_arg |
| < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2) |
| || opts->x_ix86_incoming_stack_boundary_arg > 12) |
| error ("-mincoming-stack-boundary=%d is not between %d and 12", |
| opts->x_ix86_incoming_stack_boundary_arg, |
| TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2); |
| else |
| { |
| ix86_user_incoming_stack_boundary |
| = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; |
| ix86_incoming_stack_boundary |
| = ix86_user_incoming_stack_boundary; |
| } |
| } |
| |
| /* Accept -msseregparm only if at least SSE support is enabled. */ |
| if (TARGET_SSEREGPARM_P (opts->x_target_flags) |
| && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) |
| error ("%ssseregparm%s used without SSE enabled", prefix, suffix); |
| |
| if (opts_set->x_ix86_fpmath) |
| { |
| if (opts->x_ix86_fpmath & FPMATH_SSE) |
| { |
| if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) |
| { |
| warning (0, "SSE instruction set disabled, using 387 arithmetics"); |
| opts->x_ix86_fpmath = FPMATH_387; |
| } |
| else if ((opts->x_ix86_fpmath & FPMATH_387) |
| && !TARGET_80387_P (opts->x_target_flags)) |
| { |
| warning (0, "387 instruction set disabled, using SSE arithmetics"); |
| opts->x_ix86_fpmath = FPMATH_SSE; |
| } |
| } |
| } |
| /* For all chips supporting SSE2, -mfpmath=sse performs better than |
| fpmath=387. The second is however default at many targets since the |
| extra 80bit precision of temporaries is considered to be part of ABI. |
| Overwrite the default at least for -ffast-math. |
| TODO: -mfpmath=both seems to produce same performing code with bit |
| smaller binaries. It is however not clear if register allocation is |
| ready for this setting. |
| Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE |
| codegen. We may switch to 387 with -ffast-math for size optimized |
| functions. */ |
| else if (fast_math_flags_set_p (&global_options) |
| && TARGET_SSE2_P (opts->x_ix86_isa_flags)) |
| opts->x_ix86_fpmath = FPMATH_SSE; |
| else |
| opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); |
| |
| /* If the i387 is disabled, then do not return values in it. */ |
| if (!TARGET_80387_P (opts->x_target_flags)) |
| opts->x_target_flags &= ~MASK_FLOAT_RETURNS; |
| |
| /* Use external vectorized library in vectorizing intrinsics. */ |
| if (opts_set->x_ix86_veclibabi_type) |
| switch (opts->x_ix86_veclibabi_type) |
| { |
| case ix86_veclibabi_type_svml: |
| ix86_veclib_handler = ix86_veclibabi_svml; |
| break; |
| |
| case ix86_veclibabi_type_acml: |
| ix86_veclib_handler = ix86_veclibabi_acml; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] |
| && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) |
| && !opts->x_optimize_size) |
| opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; |
| |
| /* If stack probes are required, the space used for large function |
| arguments on the stack must also be probed, so enable |
| -maccumulate-outgoing-args so this happens in the prologue. */ |
| if (TARGET_STACK_PROBE_P (opts->x_target_flags) |
| && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) |
| { |
| if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) |
| warning (0, "stack probing requires %saccumulate-outgoing-args%s " |
| "for correctness", prefix, suffix); |
| opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; |
| } |
| |
| /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ |
| { |
| char *p; |
| ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); |
| p = strchr (internal_label_prefix, 'X'); |
| internal_label_prefix_len = p - internal_label_prefix; |
| *p = '\0'; |
| } |
| |
| /* When scheduling description is not available, disable scheduler pass |
| so it won't slow down the compilation and make x87 code slower. */ |
| if (!TARGET_SCHEDULE) |
| opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; |
| |
| maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, |
| ix86_tune_cost->simultaneous_prefetches, |
| opts->x_param_values, |
| opts_set->x_param_values); |
| maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, |
| ix86_tune_cost->prefetch_block, |
| opts->x_param_values, |
| opts_set->x_param_values); |
| maybe_set_param_value (PARAM_L1_CACHE_SIZE, |
| ix86_tune_cost->l1_cache_size, |
| opts->x_param_values, |
| opts_set->x_param_values); |
| maybe_set_param_value (PARAM_L2_CACHE_SIZE, |
| ix86_tune_cost->l2_cache_size, |
| opts->x_param_values, |
| opts_set->x_param_values); |
| |
| /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ |
| if (opts->x_flag_prefetch_loop_arrays < 0 |
| && HAVE_prefetch |
| && (opts->x_optimize >= 3 || opts->x_flag_profile_use) |
| && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) |
| opts->x_flag_prefetch_loop_arrays = 1; |
| |
| /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) |
| can be opts->x_optimized to ap = __builtin_next_arg (0). */ |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) |
| targetm.expand_builtin_va_start = NULL; |
| |
| if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| { |
| ix86_gen_leave = gen_leave_rex64; |
| if (Pmode == DImode) |
| { |
| ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; |
| ix86_gen_tls_local_dynamic_base_64 |
| = gen_tls_local_dynamic_base_64_di; |
| } |
| else |
| { |
| ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; |
| ix86_gen_tls_local_dynamic_base_64 |
| = gen_tls_local_dynamic_base_64_si; |
| } |
| } |
| else |
| ix86_gen_leave = gen_leave; |
| |
| if (Pmode == DImode) |
| { |
| ix86_gen_add3 = gen_adddi3; |
| ix86_gen_sub3 = gen_subdi3; |
| ix86_gen_sub3_carry = gen_subdi3_carry; |
| ix86_gen_one_cmpl2 = gen_one_cmpldi2; |
| ix86_gen_andsp = gen_anddi3; |
| ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; |
| ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; |
| ix86_gen_probe_stack_range = gen_probe_stack_rangedi; |
| ix86_gen_monitor = gen_sse3_monitor_di; |
| } |
| else |
| { |
| ix86_gen_add3 = gen_addsi3; |
| ix86_gen_sub3 = gen_subsi3; |
| ix86_gen_sub3_carry = gen_subsi3_carry; |
| ix86_gen_one_cmpl2 = gen_one_cmplsi2; |
| ix86_gen_andsp = gen_andsi3; |
| ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; |
| ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; |
| ix86_gen_probe_stack_range = gen_probe_stack_rangesi; |
| ix86_gen_monitor = gen_sse3_monitor_si; |
| } |
| |
| #ifdef USE_IX86_CLD |
| /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) |
| opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; |
| #endif |
| |
| if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic) |
| { |
| if (opts->x_flag_fentry > 0) |
| sorry ("-mfentry isn%'t supported for 32-bit in combination " |
| "with -fpic"); |
| opts->x_flag_fentry = 0; |
| } |
| else if (TARGET_SEH) |
| { |
| if (opts->x_flag_fentry == 0) |
| sorry ("-mno-fentry isn%'t compatible with SEH"); |
| opts->x_flag_fentry = 1; |
| } |
| else if (opts->x_flag_fentry < 0) |
| { |
| #if defined(PROFILE_BEFORE_PROLOGUE) |
| opts->x_flag_fentry = 1; |
| #else |
| opts->x_flag_fentry = 0; |
| #endif |
| } |
| |
| /* When not opts->x_optimize for size, enable vzeroupper optimization for |
| TARGET_AVX with -fexpensive-optimizations and split 32-byte |
| AVX unaligned load/store. */ |
| if (!opts->x_optimize_size) |
| { |
| if (flag_expensive_optimizations |
| && !(opts_set->x_target_flags & MASK_VZEROUPPER)) |
| opts->x_target_flags |= MASK_VZEROUPPER; |
| if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] |
| && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) |
| opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; |
| if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] |
| && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) |
| opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; |
| /* Enable 128-bit AVX instruction generation |
| for the auto-vectorizer. */ |
| if (TARGET_AVX128_OPTIMAL |
| && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) |
| opts->x_target_flags |= MASK_PREFER_AVX128; |
| } |
| |
| if (opts->x_ix86_recip_name) |
| { |
| char *p = ASTRDUP (opts->x_ix86_recip_name); |
| char *q; |
| unsigned int mask, i; |
| bool invert; |
| |
| while ((q = strtok (p, ",")) != NULL) |
| { |
| p = NULL; |
| if (*q == '!') |
| { |
| invert = true; |
| q++; |
| } |
| else |
| invert = false; |
| |
| if (!strcmp (q, "default")) |
| mask = RECIP_MASK_ALL; |
| else |
| { |
| for (i = 0; i < ARRAY_SIZE (recip_options); i++) |
| if (!strcmp (q, recip_options[i].string)) |
| { |
| mask = recip_options[i].mask; |
| break; |
| } |
| |
| if (i == ARRAY_SIZE (recip_options)) |
| { |
| error ("unknown option for -mrecip=%s", q); |
| invert = false; |
| mask = RECIP_MASK_NONE; |
| } |
| } |
| |
| opts->x_recip_mask_explicit |= mask; |
| if (invert) |
| opts->x_recip_mask &= ~mask; |
| else |
| opts->x_recip_mask |= mask; |
| } |
| } |
| |
| if (TARGET_RECIP_P (opts->x_target_flags)) |
| opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; |
| else if (opts_set->x_target_flags & MASK_RECIP) |
| opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); |
| |
| /* Default long double to 64-bit for 32-bit Bionic and to __float128 |
| for 64-bit Bionic. */ |
| if (TARGET_HAS_BIONIC |
| && !(opts_set->x_target_flags |
| & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) |
| opts->x_target_flags |= (TARGET_64BIT |
| ? MASK_LONG_DOUBLE_128 |
| : MASK_LONG_DOUBLE_64); |
| |
| /* Only one of them can be active. */ |
| gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 |
| || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); |
| |
| /* Save the initial options in case the user does function specific |
| options. */ |
| if (main_args_p) |
| target_option_default_node = target_option_current_node |
| = build_target_option_node (opts); |
| |
| /* Handle stack protector */ |
| if (!opts_set->x_ix86_stack_protector_guard) |
| opts->x_ix86_stack_protector_guard |
| = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS; |
| |
| /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ |
| if (opts->x_ix86_tune_memcpy_strategy) |
| { |
| char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); |
| ix86_parse_stringop_strategy_string (str, false); |
| free (str); |
| } |
| |
| if (opts->x_ix86_tune_memset_strategy) |
| { |
| char *str = xstrdup (opts->x_ix86_tune_memset_strategy); |
| ix86_parse_stringop_strategy_string (str, true); |
| free (str); |
| } |
| } |
| |
| /* Implement the TARGET_OPTION_OVERRIDE hook. */ |
| |
| static void |
| ix86_option_override (void) |
| { |
| opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g); |
| static struct register_pass_info insert_vzeroupper_info |
| = { pass_insert_vzeroupper, "reload", |
| 1, PASS_POS_INSERT_AFTER |
| }; |
| |
| ix86_option_override_internal (true, &global_options, &global_options_set); |
| |
| |
| /* This needs to be done at start up. It's convenient to do it here. */ |
| register_pass (&insert_vzeroupper_info); |
| } |
| |
| /* Update register usage after having seen the compiler flags. */ |
| |
| static void |
| ix86_conditional_register_usage (void) |
| { |
| int i, c_mask; |
| unsigned int j; |
| |
| /* The PIC register, if it exists, is fixed. */ |
| j = PIC_OFFSET_TABLE_REGNUM; |
| if (j != INVALID_REGNUM) |
| fixed_regs[j] = call_used_regs[j] = 1; |
| |
| /* For 32-bit targets, squash the REX registers. */ |
| if (! TARGET_64BIT) |
| { |
| for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| } |
| |
| /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
| c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3) |
| : TARGET_64BIT ? (1 << 2) |
| : (1 << 1)); |
| |
| CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
| |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| { |
| /* Set/reset conditionally defined registers from |
| CALL_USED_REGISTERS initializer. */ |
| if (call_used_regs[i] > 1) |
| call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
| |
| /* Calculate registers of CLOBBERED_REGS register set |
| as call used registers from GENERAL_REGS register set. */ |
| if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) |
| && call_used_regs[i]) |
| SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); |
| } |
| |
| /* If MMX is disabled, squash the registers. */ |
| if (! TARGET_MMX) |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| |
| /* If SSE is disabled, squash the registers. */ |
| if (! TARGET_SSE) |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| |
| /* If the FPU is disabled, squash the registers. */ |
| if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| |
| /* If AVX512F is disabled, squash the registers. */ |
| if (! TARGET_AVX512F) |
| { |
| for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| |
| for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) |
| fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; |
| } |
| } |
| |
| |
| /* Save the current options */ |
| |
| static void |
| ix86_function_specific_save (struct cl_target_option *ptr, |
| struct gcc_options *opts) |
| { |
| ptr->arch = ix86_arch; |
| ptr->schedule = ix86_schedule; |
| ptr->tune = ix86_tune; |
| ptr->branch_cost = ix86_branch_cost; |
| ptr->tune_defaulted = ix86_tune_defaulted; |
| ptr->arch_specified = ix86_arch_specified; |
| ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; |
| ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit; |
| ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; |
| ptr->x_ix86_arch_string = opts->x_ix86_arch_string; |
| ptr->x_ix86_tune_string = opts->x_ix86_tune_string; |
| ptr->x_ix86_cmodel = opts->x_ix86_cmodel; |
| ptr->x_ix86_abi = opts->x_ix86_abi; |
| ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; |
| ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; |
| ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; |
| ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; |
| ptr->x_ix86_force_drap = opts->x_ix86_force_drap; |
| ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; |
| ptr->x_ix86_pmode = opts->x_ix86_pmode; |
| ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; |
| ptr->x_ix86_recip_name = opts->x_ix86_recip_name; |
| ptr->x_ix86_regparm = opts->x_ix86_regparm; |
| ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; |
| ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; |
| ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; |
| ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; |
| ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; |
| ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; |
| ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; |
| ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; |
| ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; |
| ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; |
| |
| /* The fields are char but the variables are not; make sure the |
| values fit in the fields. */ |
| gcc_assert (ptr->arch == ix86_arch); |
| gcc_assert (ptr->schedule == ix86_schedule); |
| gcc_assert (ptr->tune == ix86_tune); |
| gcc_assert (ptr->branch_cost == ix86_branch_cost); |
| } |
| |
| /* Restore the current options */ |
| |
| static void |
| ix86_function_specific_restore (struct gcc_options *opts, |
| struct cl_target_option *ptr) |
| { |
| enum processor_type old_tune = ix86_tune; |
| enum processor_type old_arch = ix86_arch; |
| unsigned int ix86_arch_mask; |
| int i; |
| |
| /* We don't change -fPIC. */ |
| opts->x_flag_pic = flag_pic; |
| |
| ix86_arch = (enum processor_type) ptr->arch; |
| ix86_schedule = (enum attr_cpu) ptr->schedule; |
| ix86_tune = (enum processor_type) ptr->tune; |
| opts->x_ix86_branch_cost = ptr->branch_cost; |
| ix86_tune_defaulted = ptr->tune_defaulted; |
| ix86_arch_specified = ptr->arch_specified; |
| opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; |
| opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit; |
| opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; |
| opts->x_ix86_arch_string = ptr->x_ix86_arch_string; |
| opts->x_ix86_tune_string = ptr->x_ix86_tune_string; |
| opts->x_ix86_cmodel = ptr->x_ix86_cmodel; |
| opts->x_ix86_abi = ptr->x_ix86_abi; |
| opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; |
| opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; |
| opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; |
| opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; |
| opts->x_ix86_force_drap = ptr->x_ix86_force_drap; |
| opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; |
| opts->x_ix86_pmode = ptr->x_ix86_pmode; |
| opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; |
| opts->x_ix86_recip_name = ptr->x_ix86_recip_name; |
| opts->x_ix86_regparm = ptr->x_ix86_regparm; |
| opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; |
| opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; |
| opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; |
| opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; |
| opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; |
| opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; |
| opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; |
| opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; |
| opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; |
| opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; |
| |
| /* Recreate the arch feature tests if the arch changed */ |
| if (old_arch != ix86_arch) |
| { |
| ix86_arch_mask = 1u << ix86_arch; |
| for (i = 0; i < X86_ARCH_LAST; ++i) |
| ix86_arch_features[i] |
| = !!(initial_ix86_arch_features[i] & ix86_arch_mask); |
| } |
| |
| /* Recreate the tune optimization tests */ |
| if (old_tune != ix86_tune) |
| set_ix86_tune_features (ix86_tune, false); |
| } |
| |
| /* Print the current options */ |
| |
| static void |
| ix86_function_specific_print (FILE *file, int indent, |
| struct cl_target_option *ptr) |
| { |
| char *target_string |
| = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags, |
| NULL, NULL, ptr->x_ix86_fpmath, false); |
| |
| gcc_assert (ptr->arch < PROCESSOR_max); |
| fprintf (file, "%*sarch = %d (%s)\n", |
| indent, "", |
| ptr->arch, processor_target_table[ptr->arch].name); |
| |
| gcc_assert (ptr->tune < PROCESSOR_max); |
| fprintf (file, "%*stune = %d (%s)\n", |
| indent, "", |
| ptr->tune, processor_target_table[ptr->tune].name); |
| |
| fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); |
| |
| if (target_string) |
| { |
| fprintf (file, "%*s%s\n", indent, "", target_string); |
| free (target_string); |
| } |
| } |
| |
| |
| /* Inner function to process the attribute((target(...))), take an argument and |
| set the current options from the argument. If we have a list, recursively go |
| over the list. */ |
| |
| static bool |
| ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], |
| struct gcc_options *opts, |
| struct gcc_options *opts_set, |
| struct gcc_options *enum_opts_set) |
| { |
| char *next_optstr; |
| bool ret = true; |
| |
| #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } |
| #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } |
| #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } |
| #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } |
| #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } |
| |
| enum ix86_opt_type |
| { |
| ix86_opt_unknown, |
| ix86_opt_yes, |
| ix86_opt_no, |
| ix86_opt_str, |
| ix86_opt_enum, |
| ix86_opt_isa |
| }; |
| |
| static const struct |
| { |
| const char *string; |
| size_t len; |
| enum ix86_opt_type type; |
| int opt; |
| int mask; |
| } attrs[] = { |
| /* isa options */ |
| IX86_ATTR_ISA ("3dnow", OPT_m3dnow), |
| IX86_ATTR_ISA ("abm", OPT_mabm), |
| IX86_ATTR_ISA ("bmi", OPT_mbmi), |
| IX86_ATTR_ISA ("bmi2", OPT_mbmi2), |
| IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), |
| IX86_ATTR_ISA ("tbm", OPT_mtbm), |
| IX86_ATTR_ISA ("aes", OPT_maes), |
| IX86_ATTR_ISA ("sha", OPT_msha), |
| IX86_ATTR_ISA ("avx", OPT_mavx), |
| IX86_ATTR_ISA ("avx2", OPT_mavx2), |
| IX86_ATTR_ISA ("avx512f", OPT_mavx512f), |
| IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), |
| IX86_ATTR_ISA ("avx512er", OPT_mavx512er), |
| IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), |
| IX86_ATTR_ISA ("mmx", OPT_mmmx), |
| IX86_ATTR_ISA ("pclmul", OPT_mpclmul), |
| IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), |
| IX86_ATTR_ISA ("sse", OPT_msse), |
| IX86_ATTR_ISA ("sse2", OPT_msse2), |
| IX86_ATTR_ISA ("sse3", OPT_msse3), |
| IX86_ATTR_ISA ("sse4", OPT_msse4), |
| IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), |
| IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), |
| IX86_ATTR_ISA ("sse4a", OPT_msse4a), |
| IX86_ATTR_ISA ("ssse3", OPT_mssse3), |
| IX86_ATTR_ISA ("fma4", OPT_mfma4), |
| IX86_ATTR_ISA ("fma", OPT_mfma), |
| IX86_ATTR_ISA ("xop", OPT_mxop), |
| IX86_ATTR_ISA ("lwp", OPT_mlwp), |
| IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), |
| IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), |
| IX86_ATTR_ISA ("f16c", OPT_mf16c), |
| IX86_ATTR_ISA ("rtm", OPT_mrtm), |
| IX86_ATTR_ISA ("hle", OPT_mhle), |
| IX86_ATTR_ISA ("prfchw", OPT_mprfchw), |
| IX86_ATTR_ISA ("rdseed", OPT_mrdseed), |
| IX86_ATTR_ISA ("adx", OPT_madx), |
| IX86_ATTR_ISA ("fxsr", OPT_mfxsr), |
| IX86_ATTR_ISA ("xsave", OPT_mxsave), |
| IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), |
| IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), |
| IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), |
| IX86_ATTR_ISA ("xsavec", OPT_mxsavec), |
| IX86_ATTR_ISA ("xsaves", OPT_mxsaves), |
| |
| /* enum options */ |
| IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), |
| |
| /* string options */ |
| IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), |
| IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), |
| |
| /* flag options */ |
| IX86_ATTR_YES ("cld", |
| OPT_mcld, |
| MASK_CLD), |
| |
| IX86_ATTR_NO ("fancy-math-387", |
| OPT_mfancy_math_387, |
| MASK_NO_FANCY_MATH_387), |
| |
| IX86_ATTR_YES ("ieee-fp", |
| OPT_mieee_fp, |
| MASK_IEEE_FP), |
| |
| IX86_ATTR_YES ("inline-all-stringops", |
| OPT_minline_all_stringops, |
| MASK_INLINE_ALL_STRINGOPS), |
| |
| IX86_ATTR_YES ("inline-stringops-dynamically", |
| OPT_minline_stringops_dynamically, |
| MASK_INLINE_STRINGOPS_DYNAMICALLY), |
| |
| IX86_ATTR_NO ("align-stringops", |
| OPT_mno_align_stringops, |
| MASK_NO_ALIGN_STRINGOPS), |
| |
| IX86_ATTR_YES ("recip", |
| OPT_mrecip, |
| MASK_RECIP), |
| |
| }; |
| |
| /* If this is a list, recurse to get the options. */ |
| if (TREE_CODE (args) == TREE_LIST) |
| { |
| bool ret = true; |
| |
| for (; args; args = TREE_CHAIN (args)) |
| if (TREE_VALUE (args) |
| && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), |
| p_strings, opts, opts_set, |
| enum_opts_set)) |
| ret = false; |
| |
| return ret; |
| } |
| |
| else if (TREE_CODE (args) != STRING_CST) |
| { |
| error ("attribute %<target%> argument not a string"); |
| return false; |
| } |
| |
| /* Handle multiple arguments separated by commas. */ |
| next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); |
| |
| while (next_optstr && *next_optstr != '\0') |
| { |
| char *p = next_optstr; |
| char *orig_p = p; |
| char *comma = strchr (next_optstr, ','); |
| const char *opt_string; |
| size_t len, opt_len; |
| int opt; |
| bool opt_set_p; |
| char ch; |
| unsigned i; |
| enum ix86_opt_type type = ix86_opt_unknown; |
| int mask = 0; |
| |
| if (comma) |
| { |
| *comma = '\0'; |
| len = comma - next_optstr; |
| next_optstr = comma + 1; |
| } |
| else |
| { |
| len = strlen (p); |
| next_optstr = NULL; |
| } |
| |
| /* Recognize no-xxx. */ |
| if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') |
| { |
| opt_set_p = false; |
| p += 3; |
| len -= 3; |
| } |
| else |
| opt_set_p = true; |
| |
| /* Find the option. */ |
| ch = *p; |
| opt = N_OPTS; |
| for (i = 0; i < ARRAY_SIZE (attrs); i++) |
| { |
| type = attrs[i].type; |
| opt_len = attrs[i].len; |
| if (ch == attrs[i].string[0] |
| && ((type != ix86_opt_str && type != ix86_opt_enum) |
| ? len == opt_len |
| : len > opt_len) |
| && memcmp (p, attrs[i].string, opt_len) == 0) |
| { |
| opt = attrs[i].opt; |
| mask = attrs[i].mask; |
| opt_string = attrs[i].string; |
| break; |
| } |
| } |
| |
| /* Process the option. */ |
| if (opt == N_OPTS) |
| { |
| error ("attribute(target(\"%s\")) is unknown", orig_p); |
| ret = false; |
| } |
| |
| else if (type == ix86_opt_isa) |
| { |
| struct cl_decoded_option decoded; |
| |
| generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); |
| ix86_handle_option (opts, opts_set, |
| &decoded, input_location); |
| } |
| |
| else if (type == ix86_opt_yes || type == ix86_opt_no) |
| { |
| if (type == ix86_opt_no) |
| opt_set_p = !opt_set_p; |
| |
| if (opt_set_p) |
| opts->x_target_flags |= mask; |
| else |
| opts->x_target_flags &= ~mask; |
| } |
| |
| else if (type == ix86_opt_str) |
| { |
| if (p_strings[opt]) |
| { |
| error ("option(\"%s\") was already specified", opt_string); |
| ret = false; |
| } |
| else |
| p_strings[opt] = xstrdup (p + opt_len); |
| } |
| |
| else if (type == ix86_opt_enum) |
| { |
| bool arg_ok; |
| int value; |
| |
| arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); |
| if (arg_ok) |
| set_option (opts, enum_opts_set, opt, value, |
| p + opt_len, DK_UNSPECIFIED, input_location, |
| global_dc); |
| else |
| { |
| error ("attribute(target(\"%s\")) is unknown", orig_p); |
| ret = false; |
| } |
| } |
| |
| else |
| gcc_unreachable (); |
| } |
| |
| return ret; |
| } |
| |
| /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ |
| |
| tree |
| ix86_valid_target_attribute_tree (tree args, |
| struct gcc_options *opts, |
| struct gcc_options *opts_set) |
| { |
| const char *orig_arch_string = opts->x_ix86_arch_string; |
| const char *orig_tune_string = opts->x_ix86_tune_string; |
| enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; |
| int orig_tune_defaulted = ix86_tune_defaulted; |
| int orig_arch_specified = ix86_arch_specified; |
| char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; |
| tree t = NULL_TREE; |
| int i; |
| struct cl_target_option *def |
| = TREE_TARGET_OPTION (target_option_default_node); |
| struct gcc_options enum_opts_set; |
| |
| memset (&enum_opts_set, 0, sizeof (enum_opts_set)); |
| |
| /* Process each of the options on the chain. */ |
| if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts, |
| opts_set, &enum_opts_set)) |
| return error_mark_node; |
| |
| /* If the changed options are different from the default, rerun |
| ix86_option_override_internal, and then save the options away. |
| The string options are are attribute options, and will be undone |
| when we copy the save structure. */ |
| if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags |
| || opts->x_target_flags != def->x_target_flags |
| || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] |
| || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] |
| || enum_opts_set.x_ix86_fpmath) |
| { |
| /* If we are using the default tune= or arch=, undo the string assigned, |
| and use the default. */ |
| if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) |
| opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; |
| else if (!orig_arch_specified) |
| opts->x_ix86_arch_string = NULL; |
| |
| if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) |
| opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; |
| else if (orig_tune_defaulted) |
| opts->x_ix86_tune_string = NULL; |
| |
| /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ |
| if (enum_opts_set.x_ix86_fpmath) |
| opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; |
| else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) |
| && TARGET_SSE_P (opts->x_ix86_isa_flags)) |
| { |
| opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); |
| opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; |
| } |
| |
| /* Do any overrides, such as arch=xxx, or tune=xxx support. */ |
| ix86_option_override_internal (false, opts, opts_set); |
| |
| /* Add any builtin functions with the new isa if any. */ |
| ix86_add_new_builtins (opts->x_ix86_isa_flags); |
| |
| /* Save the current options unless we are validating options for |
| #pragma. */ |
| t = build_target_option_node (opts); |
| |
| opts->x_ix86_arch_string = orig_arch_string; |
| opts->x_ix86_tune_string = orig_tune_string; |
| opts_set->x_ix86_fpmath = orig_fpmath_set; |
| |
| /* Free up memory allocated to hold the strings */ |
| for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) |
| free (option_strings[i]); |
| } |
| |
| return t; |
| } |
| |
| /* Hook to validate attribute((target("string"))). */ |
| |
| static bool |
| ix86_valid_target_attribute_p (tree fndecl, |
| tree ARG_UNUSED (name), |
| tree args, |
| int ARG_UNUSED (flags)) |
| { |
| struct gcc_options func_options; |
| tree new_target, new_optimize; |
| bool ret = true; |
| |
| /* attribute((target("default"))) does nothing, beyond |
| affecting multi-versioning. */ |
| if (TREE_VALUE (args) |
| && TREE_CODE (TREE_VALUE (args)) == STRING_CST |
| && TREE_CHAIN (args) == NULL_TREE |
| && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) |
| return true; |
| |
| tree old_optimize = build_optimization_node (&global_options); |
| |
| /* Get the optimization options of the current function. */ |
| tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); |
| |
| if (!func_optimize) |
| func_optimize = old_optimize; |
| |
| /* Init func_options. */ |
| memset (&func_options, 0, sizeof (func_options)); |
| init_options_struct (&func_options, NULL); |
| lang_hooks.init_options_struct (&func_options); |
| |
| cl_optimization_restore (&func_options, |
| TREE_OPTIMIZATION (func_optimize)); |
| |
| /* Initialize func_options to the default before its target options can |
| be set. */ |
| cl_target_option_restore (&func_options, |
| TREE_TARGET_OPTION (target_option_default_node)); |
| |
| new_target = ix86_valid_target_attribute_tree (args, &func_options, |
| &global_options_set); |
| |
| new_optimize = build_optimization_node (&func_options); |
| |
| if (new_target == error_mark_node) |
| ret = false; |
| |
| else if (fndecl && new_target) |
| { |
| DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; |
| |
| if (old_optimize != new_optimize) |
| DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; |
| } |
| |
| return ret; |
| } |
| |
| |
| /* Hook to determine if one function can safely inline another. */ |
| |
| static bool |
| ix86_can_inline_p (tree caller, tree callee) |
| { |
| bool ret = false; |
| tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
| tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
| |
| /* If callee has no option attributes, then it is ok to inline. */ |
| if (!callee_tree) |
| ret = true; |
| |
| /* If caller has no option attributes, but callee does then it is not ok to |
| inline. */ |
| else if (!caller_tree) |
| ret = false; |
| |
| else |
| { |
| struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
| struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
| |
| /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function |
| can inline a SSE2 function but a SSE2 function can't inline a SSE4 |
| function. */ |
| if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
| != callee_opts->x_ix86_isa_flags) |
| ret = false; |
| |
| /* See if we have the same non-isa options. */ |
| else if (caller_opts->x_target_flags != callee_opts->x_target_flags) |
| ret = false; |
| |
| /* See if arch, tune, etc. are the same. */ |
| else if (caller_opts->arch != callee_opts->arch) |
| ret = false; |
| |
| else if (caller_opts->tune != callee_opts->tune) |
| ret = false; |
| |
| else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath) |
| ret = false; |
| |
| else if (caller_opts->branch_cost != callee_opts->branch_cost) |
| ret = false; |
| |
| else |
| ret = true; |
| } |
| |
| return ret; |
| } |
| |
| |
| /* Remember the last target of ix86_set_current_function. */ |
| static GTY(()) tree ix86_previous_fndecl; |
| |
| /* Invalidate ix86_previous_fndecl cache. */ |
| void |
| ix86_reset_previous_fndecl (void) |
| { |
| ix86_previous_fndecl = NULL_TREE; |
| } |
| |
| /* Establish appropriate back-end context for processing the function |
| FNDECL. The argument might be NULL to indicate processing at top |
| level, outside of any function scope. */ |
| static void |
| ix86_set_current_function (tree fndecl) |
| { |
| /* Only change the context if the function changes. This hook is called |
| several times in the course of compiling a function, and we don't want to |
| slow things down too much or call target_reinit when it isn't safe. */ |
| if (fndecl && fndecl != ix86_previous_fndecl) |
| { |
| tree old_tree = (ix86_previous_fndecl |
| ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) |
| : NULL_TREE); |
| |
| tree new_tree = (fndecl |
| ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) |
| : NULL_TREE); |
| |
| ix86_previous_fndecl = fndecl; |
| if (old_tree == new_tree) |
| ; |
| |
| else if (new_tree) |
| { |
| cl_target_option_restore (&global_options, |
| TREE_TARGET_OPTION (new_tree)); |
| if (TREE_TARGET_GLOBALS (new_tree)) |
| restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); |
| else |
| TREE_TARGET_GLOBALS (new_tree) |
| = save_target_globals_default_opts (); |
| } |
| |
| else if (old_tree) |
| { |
| new_tree = target_option_current_node; |
| cl_target_option_restore (&global_options, |
| TREE_TARGET_OPTION (new_tree)); |
| if (TREE_TARGET_GLOBALS (new_tree)) |
| restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); |
| else if (new_tree == target_option_default_node) |
| restore_target_globals (&default_target_globals); |
| else |
| TREE_TARGET_GLOBALS (new_tree) |
| = save_target_globals_default_opts (); |
| } |
| } |
| } |
| |
| |
| /* Return true if this goes in large data/bss. */ |
| |
| static bool |
| ix86_in_large_data_p (tree exp) |
| { |
| if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) |
| return false; |
| |
| /* Functions are never large data. */ |
| if (TREE_CODE (exp) == FUNCTION_DECL) |
| return false; |
| |
| if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) |
| { |
| const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); |
| if (strcmp (section, ".ldata") == 0 |
| || strcmp (section, ".lbss") == 0) |
| return true; |
| return false; |
| } |
| else |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
| |
| /* If this is an incomplete type with size 0, then we can't put it |
| in data because it might be too big when completed. */ |
| if (!size || size > ix86_section_threshold) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Switch to the appropriate section for output of DECL. |
| DECL is either a `VAR_DECL' node or a constant of some sort. |
| RELOC indicates whether forming the initial value of DECL requires |
| link-time relocations. */ |
| |
| ATTRIBUTE_UNUSED static section * |
| x86_64_elf_select_section (tree decl, int reloc, |
| unsigned HOST_WIDE_INT align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && ix86_in_large_data_p (decl)) |
| { |
| const char *sname = NULL; |
| unsigned int flags = SECTION_WRITE; |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| sname = ".ldata"; |
| break; |
| case SECCAT_DATA_REL: |
| sname = ".ldata.rel"; |
| break; |
| case SECCAT_DATA_REL_LOCAL: |
| sname = ".ldata.rel.local"; |
| break; |
| case SECCAT_DATA_REL_RO: |
| sname = ".ldata.rel.ro"; |
| break; |
| case SECCAT_DATA_REL_RO_LOCAL: |
| sname = ".ldata.rel.ro.local"; |
| break; |
| case SECCAT_BSS: |
| sname = ".lbss"; |
| flags |= SECTION_BSS; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| sname = ".lrodata"; |
| flags = 0; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (sname) |
| { |
| /* We might get called with string constants, but get_named_section |
| doesn't like them as they are not DECLs. Also, we need to set |
| flags in that case. */ |
| if (!DECL_P (decl)) |
| return get_section (sname, flags, NULL); |
| return get_named_section (decl, sname, reloc); |
| } |
| } |
| return default_elf_select_section (decl, reloc, align); |
| } |
| |
| /* Select a set of attributes for section NAME based on the properties |
| of DECL and whether or not RELOC indicates that DECL's initializer |
| might contain runtime relocations. */ |
| |
| static unsigned int ATTRIBUTE_UNUSED |
| x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
| { |
| unsigned int flags = default_section_type_flags (decl, name, reloc); |
| |
| if (decl == NULL_TREE |
| && (strcmp (name, ".ldata.rel.ro") == 0 |
| || strcmp (name, ".ldata.rel.ro.local") == 0)) |
| flags |= SECTION_RELRO; |
| |
| if (strcmp (name, ".lbss") == 0 |
| || strncmp (name, ".lbss.", 5) == 0 |
| || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) |
| flags |= SECTION_BSS; |
| |
| return flags; |
| } |
| |
| /* Build up a unique section name, expressed as a |
| STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
| RELOC indicates whether the initial value of EXP requires |
| link-time relocations. */ |
| |
| static void ATTRIBUTE_UNUSED |
| x86_64_elf_unique_section (tree decl, int reloc) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && ix86_in_large_data_p (decl)) |
| { |
| const char *prefix = NULL; |
| /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
| bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
| |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| case SECCAT_DATA_REL: |
| case SECCAT_DATA_REL_LOCAL: |
| case SECCAT_DATA_REL_RO: |
| case SECCAT_DATA_REL_RO_LOCAL: |
| prefix = one_only ? ".ld" : ".ldata"; |
| break; |
| case SECCAT_BSS: |
| prefix = one_only ? ".lb" : ".lbss"; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| prefix = one_only ? ".lr" : ".lrodata"; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (prefix) |
| { |
| const char *name, *linkonce; |
| char *string; |
| |
| name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
| name = targetm.strip_name_encoding (name); |
| |
| /* If we're using one_only, then there needs to be a .gnu.linkonce |
| prefix to the section name. */ |
| linkonce = one_only ? ".gnu.linkonce" : ""; |
| |
| string = ACONCAT ((linkonce, prefix, ".", name, NULL)); |
| |
| DECL_SECTION_NAME (decl) = build_string (strlen (string), string); |
| return; |
| } |
| } |
| default_unique_section (decl, reloc); |
| } |
| |
| #ifdef COMMON_ASM_OP |
| /* This says how to output assembler code to declare an |
| uninitialized external linkage data object. |
| |
| For medium model x86-64 we need to use .largecomm opcode for |
| large objects. */ |
| void |
| x86_elf_aligned_common (FILE *file, |
| const char *name, unsigned HOST_WIDE_INT size, |
| int align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| fputs (".largecomm\t", file); |
| else |
| fputs (COMMON_ASM_OP, file); |
| assemble_name (file, name); |
| fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", |
| size, align / BITS_PER_UNIT); |
| } |
| #endif |
| |
| /* Utility function for targets to use in implementing |
| ASM_OUTPUT_ALIGNED_BSS. */ |
| |
| void |
| x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, |
| const char *name, unsigned HOST_WIDE_INT size, |
| int align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| switch_to_section (get_named_section (decl, ".lbss", 0)); |
| else |
| switch_to_section (bss_section); |
| ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
| #ifdef ASM_DECLARE_OBJECT_NAME |
| last_assemble_variable_decl = decl; |
| ASM_DECLARE_OBJECT_NAME (file, name, decl); |
| #else |
| /* Standard thing is just output label for the object. */ |
| ASM_OUTPUT_LABEL (file, name); |
| #endif /* ASM_DECLARE_OBJECT_NAME */ |
| ASM_OUTPUT_SKIP (file, size ? size : 1); |
| } |
| |
| /* Decide whether we must probe the stack before any space allocation |
| on this target. It's essentially TARGET_STACK_PROBE except when |
| -fstack-check causes the stack to be already probed differently. */ |
| |
| bool |
| ix86_target_stack_probe (void) |
| { |
| /* Do not probe the stack twice if static stack checking is enabled. */ |
| if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
| return false; |
| |
| return TARGET_STACK_PROBE; |
| } |
| |
| /* Decide whether we can make a sibling call to a function. DECL is the |
| declaration of the function being targeted by the call and EXP is the |
| CALL_EXPR representing the call. */ |
| |
| static bool |
| ix86_function_ok_for_sibcall (tree decl, tree exp) |
| { |
| tree type, decl_or_type; |
| rtx a, b; |
| |
| /* If we are generating position-independent code, we cannot sibcall |
| optimize any indirect call, or a direct call to a global function, |
| as the PLT requires %ebx be live. (Darwin does not have a PLT.) */ |
| if (!TARGET_MACHO |
| && !TARGET_64BIT |
| && flag_pic |
| && (!decl || !targetm.binds_local_p (decl))) |
| return false; |
| |
| /* If we need to align the outgoing stack, then sibcalling would |
| unalign the stack, which may break the called function. */ |
| if (ix86_minimum_incoming_stack_boundary (true) |
| < PREFERRED_STACK_BOUNDARY) |
| return false; |
| |
| if (decl) |
| { |
| decl_or_type = decl; |
| type = TREE_TYPE (decl); |
| } |
| else |
| { |
| /* We're looking at the CALL_EXPR, we need the type of the function. */ |
| type = CALL_EXPR_FN (exp); /* pointer expression */ |
| type = TREE_TYPE (type); /* pointer type */ |
| type = TREE_TYPE (type); /* function type */ |
| decl_or_type = type; |
| } |
| |
| /* Check that the return value locations are the same. Like |
| if we are returning floats on the 80387 register stack, we cannot |
| make a sibcall from a function that doesn't return a float to a |
| function that does or, conversely, from a function that does return |
| a float to a function that doesn't; the necessary stack adjustment |
| would not be executed. This is also the place we notice |
| differences in the return value ABI. Note that it is ok for one |
| of the functions to have void return type as long as the return |
| value of the other is passed in a register. */ |
| a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
| b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
| cfun->decl, false); |
| if (STACK_REG_P (a) || STACK_REG_P (b)) |
| { |
| if (!rtx_equal_p (a, b)) |
| return false; |
| } |
| else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
| ; |
| else if (!rtx_equal_p (a, b)) |
| return false; |
| |
| if (TARGET_64BIT) |
| { |
| /* The SYSV ABI has more call-clobbered registers; |
| disallow sibcalls from MS to SYSV. */ |
| if (cfun->machine->call_abi == MS_ABI |
| && ix86_function_type_abi (type) == SYSV_ABI) |
| return false; |
| } |
| else |
| { |
| /* If this call is indirect, we'll need to be able to use a |
| call-clobbered register for the address of the target function. |
| Make sure that all such registers are not used for passing |
| parameters. Note that DLLIMPORT functions are indirect. */ |
| if (!decl |
| || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))) |
| { |
| if (ix86_function_regparm (type, NULL) >= 3) |
| { |
| /* ??? Need to count the actual number of registers to be used, |
| not the possible number of registers. Fix later. */ |
| return false; |
| } |
| } |
| } |
| |
| /* Otherwise okay. That also includes certain types of indirect calls. */ |
| return true; |
| } |
| |
| /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", |
| and "sseregparm" calling convention attributes; |
| arguments as in struct attribute_spec.handler. */ |
| |
| static tree |
| ix86_handle_cconv_attribute (tree *node, tree name, |
| tree args, |
| int flags ATTRIBUTE_UNUSED, |
| bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_TYPE |
| && TREE_CODE (*node) != METHOD_TYPE |
| && TREE_CODE (*node) != FIELD_DECL |
| && TREE_CODE (*node) != TYPE_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| return NULL_TREE; |
| } |
| |
| /* Can combine regparm with all attributes but fastcall, and thiscall. */ |
| if (is_attribute_p ("regparm", name)) |
| { |
| tree cst; |
| |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and regparm attributes are not compatible"); |
| } |
| |
| if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("regparam and thiscall attributes are not compatible"); |
| } |
| |
| cst = TREE_VALUE (args); |
| if (TREE_CODE (cst) != INTEGER_CST) |
| { |
| warning (OPT_Wattributes, |
| "%qE attribute requires an integer constant argument", |
| name); |
| *no_add_attrs = true; |
| } |
| else if (compare_tree_int (cst, REGPARM_MAX) > 0) |
| { |
| warning (OPT_Wattributes, "argument to %qE attribute larger than %d", |
| name, REGPARM_MAX); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| if (TARGET_64BIT) |
| { |
| /* Do not warn when emulating the MS ABI. */ |
| if ((TREE_CODE (*node) != FUNCTION_TYPE |
| && TREE_CODE (*node) != METHOD_TYPE) |
| || ix86_function_type_abi (*node) != MS_ABI) |
| warning (OPT_Wattributes, "%qE attribute ignored", |
| name); |
| *no_add_attrs = true; |
| return NULL_TREE; |
| } |
| |
| /* Can combine fastcall with stdcall (redundant) and sseregparm. */ |
| if (is_attribute_p ("fastcall", name)) |
| { |
| if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and stdcall attributes are not compatible"); |
| } |
| if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and regparm attributes are not compatible"); |
| } |
| if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and thiscall attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine stdcall with fastcall (redundant), regparm and |
| sseregparm. */ |
| else if (is_attribute_p ("stdcall", name)) |
| { |
| if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and fastcall attributes are not compatible"); |
| } |
| if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and thiscall attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine cdecl with regparm and sseregparm. */ |
| else if (is_attribute_p ("cdecl", name)) |
| { |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and cdecl attributes are not compatible"); |
| } |
| if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("cdecl and thiscall attributes are not compatible"); |
| } |
| } |
| else if (is_attribute_p ("thiscall", name)) |
| { |
| if (TREE_CODE (*node) != METHOD_TYPE && pedantic) |
| warning (OPT_Wattributes, "%qE attribute is used for none class-method", |
| name); |
| if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("stdcall and thiscall attributes are not compatible"); |
| } |
| if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("fastcall and thiscall attributes are not compatible"); |
| } |
| if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) |
| { |
| error ("cdecl and thiscall attributes are not compatible"); |
| } |
| } |
| |
| /* Can combine sseregparm with all attributes. */ |
| |
| return NULL_TREE; |
| } |
| |
| /* The transactional memory builtins are implicitly regparm or fastcall |
| depending on the ABI. Override the generic do-nothing attribute that |
| these builtins were declared with, and replace it with one of the two |
| attributes that we expect elsewhere. */ |
| |
| static tree |
| ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED, |
| tree args ATTRIBUTE_UNUSED, |
| int flags, bool *no_add_attrs) |
| { |
| tree alt; |
| |
| /* In no case do we want to add the placeholder attribute. */ |
| *no_add_attrs = true; |
| |
| /* The 64-bit ABI is unchanged for transactional memory. */ |
| if (TARGET_64BIT) |
| return NULL_TREE; |
| |
| /* ??? Is there a better way to validate 32-bit windows? We have |
| cfun->machine->call_abi, but that seems to be set only for 64-bit. */ |
| if (CHECK_STACK_LIMIT > 0) |
| alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); |
| else |
| { |
| alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); |
| alt = tree_cons (get_identifier ("regparm"), alt, NULL); |
| } |
| decl_attributes (node, alt, flags); |
| |
| return NULL_TREE; |
| } |
| |
| /* This function determines from TYPE the calling-convention. */ |
| |
| unsigned int |
| ix86_get_callcvt (const_tree type) |
| { |
| unsigned int ret = 0; |
| bool is_stdarg; |
| tree attrs; |
| |
| if (TARGET_64BIT) |
| return IX86_CALLCVT_CDECL; |
| |
| attrs = TYPE_ATTRIBUTES (type); |
| if (attrs != NULL_TREE) |
| { |
| if (lookup_attribute ("cdecl", attrs)) |
| ret |= IX86_CALLCVT_CDECL; |
| else if (lookup_attribute ("stdcall", attrs)) |
| ret |= IX86_CALLCVT_STDCALL; |
| else if (lookup_attribute ("fastcall", attrs)) |
| ret |= IX86_CALLCVT_FASTCALL; |
| else if (lookup_attribute ("thiscall", attrs)) |
| ret |= IX86_CALLCVT_THISCALL; |
| |
| /* Regparam isn't allowed for thiscall and fastcall. */ |
| if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
| { |
| if (lookup_attribute ("regparm", attrs)) |
| ret |= IX86_CALLCVT_REGPARM; |
| if (lookup_attribute ("sseregparm", attrs)) |
| ret |= IX86_CALLCVT_SSEREGPARM; |
| } |
| |
| if (IX86_BASE_CALLCVT(ret) != 0) |
| return ret; |
| } |
| |
| is_stdarg = stdarg_p (type); |
| if (TARGET_RTD && !is_stdarg) |
| return IX86_CALLCVT_STDCALL | ret; |
| |
| if (ret != 0 |
| || is_stdarg |
| || TREE_CODE (type) != METHOD_TYPE |
| || ix86_function_type_abi (type) != MS_ABI) |
| return IX86_CALLCVT_CDECL | ret; |
| |
| return IX86_CALLCVT_THISCALL; |
| } |
| |
| /* Return 0 if the attributes for two types are incompatible, 1 if they |
| are compatible, and 2 if they are nearly compatible (which causes a |
| warning to be generated). */ |
| |
| static int |
| ix86_comp_type_attributes (const_tree type1, const_tree type2) |
| { |
| unsigned int ccvt1, ccvt2; |
| |
| if (TREE_CODE (type1) != FUNCTION_TYPE |
| && TREE_CODE (type1) != METHOD_TYPE) |
| return 1; |
| |
| ccvt1 = ix86_get_callcvt (type1); |
| ccvt2 = ix86_get_callcvt (type2); |
| if (ccvt1 != ccvt2) |
| return 0; |
| if (ix86_function_regparm (type1, NULL) |
| != ix86_function_regparm (type2, NULL)) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Return the regparm value for a function with the indicated TYPE and DECL. |
| DECL may be NULL when calling function indirectly |
| or considering a libcall. */ |
| |
| static int |
| ix86_function_regparm (const_tree type, const_tree decl) |
| { |
| tree attr; |
| int regparm; |
| unsigned int ccvt; |
| |
| if (TARGET_64BIT) |
| return (ix86_function_type_abi (type) == SYSV_ABI |
| ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
| ccvt = ix86_get_callcvt (type); |
| regparm = ix86_regparm; |
| |
| if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
| { |
| attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); |
| if (attr) |
| { |
| regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
| return regparm; |
| } |
| } |
| else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| return 2; |
| else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| return 1; |
| |
| /* Use register calling convention for local functions when possible. */ |
| if (decl |
| && TREE_CODE (decl) == FUNCTION_DECL |
| /* Caller and callee must agree on the calling convention, so |
| checking here just optimize means that with |
| __attribute__((optimize (...))) caller could use regparm convention |
| and callee not, or vice versa. Instead look at whether the callee |
| is optimized or not. */ |
| && opt_for_fn (decl, optimize) |
| && !(profile_flag && !flag_fentry)) |
| { |
| /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ |
| struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl)); |
| if (i && i->local && i->can_change_signature) |
| { |
| int local_regparm, globals = 0, regno; |
| |
| /* Make sure no regparm register is taken by a |
| fixed register variable. */ |
| for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++) |
| if (fixed_regs[local_regparm]) |
| break; |
| |
| /* We don't want to use regparm(3) for nested functions as |
| these use a static chain pointer in the third argument. */ |
| if (local_regparm == 3 && DECL_STATIC_CHAIN (decl)) |
| local_regparm = 2; |
| |
| /* In 32-bit mode save a register for the split stack. */ |
| if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack) |
| local_regparm = 2; |
| |
| /* Each fixed register usage increases register pressure, |
| so less registers should be used for argument passing. |
| This functionality can be overriden by an explicit |
| regparm value. */ |
| for (regno = AX_REG; regno <= DI_REG; regno++) |
| if (fixed_regs[regno]) |
| globals++; |
| |
| local_regparm |
| = globals < local_regparm ? local_regparm - globals : 0; |
| |
| if (local_regparm > regparm) |
| regparm = local_regparm; |
| } |
| } |
| |
| return regparm; |
| } |
| |
| /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
| DFmode (2) arguments in SSE registers for a function with the |
| indicated TYPE and DECL. DECL may be NULL when calling function |
| indirectly or considering a libcall. Otherwise return 0. */ |
| |
| static int |
| ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
| { |
| gcc_assert (!TARGET_64BIT); |
| |
| /* Use SSE registers to pass SFmode and DFmode arguments if requested |
| by the sseregparm attribute. */ |
| if (TARGET_SSEREGPARM |
| || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) |
| { |
| if (!TARGET_SSE) |
| { |
| if (warn) |
| { |
| if (decl) |
| error ("calling %qD with attribute sseregparm without " |
| "SSE/SSE2 enabled", decl); |
| else |
| error ("calling %qT with attribute sseregparm without " |
| "SSE/SSE2 enabled", type); |
| } |
| return 0; |
| } |
| |
| return 2; |
| } |
| |
| /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
| (and DFmode for SSE2) arguments in SSE registers. */ |
| if (decl && TARGET_SSE_MATH && optimize |
| && !(profile_flag && !flag_fentry)) |
| { |
| /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ |
| struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); |
| if (i && i->local && i->can_change_signature) |
| return TARGET_SSE2 ? 2 : 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if EAX is live at the start of the function. Used by |
| ix86_expand_prologue to determine if we need special help before |
| calling allocate_stack_worker. */ |
| |
| static bool |
| ix86_eax_live_at_start_p (void) |
| { |
| /* Cheat. Don't bother working forward from ix86_function_regparm |
| to the function type to whether an actual argument is located in |
| eax. Instead just look at cfg info, which is still close enough |
| to correct at this point. This gives false positives for broken |
| functions that might use uninitialized data that happens to be |
| allocated in eax, but who cares? */ |
| return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
| } |
| |
| static bool |
| ix86_keep_aggregate_return_pointer (tree fntype) |
| { |
| tree attr; |
| |
| if (!TARGET_64BIT) |
| { |
| attr = lookup_attribute ("callee_pop_aggregate_return", |
| TYPE_ATTRIBUTES (fntype)); |
| if (attr) |
| return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
| |
| /* For 32-bit MS-ABI the default is to keep aggregate |
| return pointer. */ |
| if (ix86_function_type_abi (fntype) == MS_ABI) |
| return true; |
| } |
| return KEEP_AGGREGATE_RETURN_POINTER != 0; |
| } |
| |
| /* Value is the number of bytes of arguments automatically |
| popped when returning from a subroutine call. |
| FUNDECL is the declaration node of the function (as a tree), |
| FUNTYPE is the data type of the function (as a tree), |
| or for a library call it is an identifier node for the subroutine name. |
| SIZE is the number of bytes of arguments passed on the stack. |
| |
| On the 80386, the RTD insn may be used to pop them if the number |
| of args is fixed, but if the number is variable then the caller |
| must pop them all. RTD can't be used for library calls now |
| because the library is compiled with the Unix compiler. |
| Use of RTD is a selectable option, since it is incompatible with |
| standard Unix calling sequences. If the option is not selected, |
| the caller must always pop the args. |
| |
| The attribute stdcall is equivalent to RTD on a per module basis. */ |
| |
| static int |
| ix86_return_pops_args (tree fundecl, tree funtype, int size) |
| { |
| unsigned int ccvt; |
| |
| /* None of the 64-bit ABIs pop arguments. */ |
| if (TARGET_64BIT) |
| return 0; |
| |
| ccvt = ix86_get_callcvt (funtype); |
| |
| if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
| | IX86_CALLCVT_THISCALL)) != 0 |
| && ! stdarg_p (funtype)) |
| return size; |
| |
| /* Lose any fake structure return argument if it is passed on the stack. */ |
| if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
| && !ix86_keep_aggregate_return_pointer (funtype)) |
| { |
| int nregs = ix86_function_regparm (funtype, fundecl); |
| if (nregs == 0) |
| return GET_MODE_SIZE (Pmode); |
| } |
| |
| return 0; |
| } |
| |
| /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
| |
| static bool |
| ix86_legitimate_combined_insn (rtx insn) |
| { |
| /* Check operand constraints in case hard registers were propagated |
| into insn pattern. This check prevents combine pass from |
| generating insn patterns with invalid hard register operands. |
| These invalid insns can eventually confuse reload to error out |
| with a spill failure. See also PRs 46829 and 46843. */ |
| if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0) |
| { |
| int i; |
| |
| extract_insn (insn); |
| preprocess_constraints (); |
| |
| for (i = 0; i < recog_data.n_operands; i++) |
| { |
| rtx op = recog_data.operand[i]; |
| enum machine_mode mode = GET_MODE (op); |
| struct operand_alternative *op_alt; |
| int offset = 0; |
| bool win; |
| int j; |
| |
| /* For pre-AVX disallow unaligned loads/stores where the |
| instructions don't support it. */ |
| if (!TARGET_AVX |
| && VECTOR_MODE_P (GET_MODE (op)) |
| && misaligned_operand (op, GET_MODE (op))) |
| { |
| int min_align = get_attr_ssememalign (insn); |
| if (min_align == 0) |
| return false; |
| } |
| |
| /* A unary operator may be accepted by the predicate, but it |
| is irrelevant for matching constraints. */ |
| if (UNARY_P (op)) |
| op = XEXP (op, 0); |
| |
| if (GET_CODE (op) == SUBREG) |
| { |
| if (REG_P (SUBREG_REG (op)) |
| && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
| offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
| GET_MODE (SUBREG_REG (op)), |
| SUBREG_BYTE (op), |
| GET_MODE (op)); |
| op = SUBREG_REG (op); |
| } |
| |
| if (!(REG_P (op) && HARD_REGISTER_P (op))) |
| continue; |
| |
| op_alt = recog_op_alt[i]; |
| |
| /* Operand has no constraints, anything is OK. */ |
| win = !recog_data.n_alternatives; |
| |
| for (j = 0; j < recog_data.n_alternatives; j++) |
| { |
| if (op_alt[j].anything_ok |
| || (op_alt[j].matches != -1 |
| && operands_match_p |
| (recog_data.operand[i], |
| recog_data.operand[op_alt[j].matches])) |
| || reg_fits_class_p (op, op_alt[j].cl, offset, mode)) |
| { |
| win = true; |
| break; |
| } |
| } |
| |
| if (!win) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
| |
| static unsigned HOST_WIDE_INT |
| ix86_asan_shadow_offset (void) |
| { |
| return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) |
| : HOST_WIDE_INT_C (0x7fff8000)) |
| : (HOST_WIDE_INT_1 << 29); |
| } |
| |
| /* Argument support functions. */ |
| |
| /* Return true when register may be used to pass function parameters. */ |
| bool |
| ix86_function_arg_regno_p (int regno) |
| { |
| int i; |
| const int *parm_regs; |
| |
| if (!TARGET_64BIT) |
| { |
| if (TARGET_MACHO) |
| return (regno < REGPARM_MAX |
| || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); |
| else |
| return (regno < REGPARM_MAX |
| || (TARGET_MMX && MMX_REGNO_P (regno) |
| && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) |
| || (TARGET_SSE && SSE_REGNO_P (regno) |
| && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); |
| } |
| |
| if (TARGET_SSE && SSE_REGNO_P (regno) |
| && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) |
| return true; |
| |
| /* TODO: The function should depend on current function ABI but |
| builtins.c would need updating then. Therefore we use the |
| default ABI. */ |
| |
| /* RAX is used as hidden argument to va_arg functions. */ |
| if (ix86_abi == SYSV_ABI && regno == AX_REG) |
| return true; |
| |
| if (ix86_abi == MS_ABI) |
| parm_regs = x86_64_ms_abi_int_parameter_registers; |
| else |
| parm_regs = x86_64_int_parameter_registers; |
| for (i = 0; i < (ix86_abi == MS_ABI |
| ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
| if (regno == parm_regs[i]) |
| return true; |
| return false; |
| } |
| |
| /* Return if we do not know how to pass TYPE solely in registers. */ |
| |
| static bool |
| ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) |
| { |
| if (must_pass_in_stack_var_size_or_pad (mode, type)) |
| return true; |
| |
| /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
| The layout_type routine is crafty and tries to trick us into passing |
| currently unsupported vector types on the stack by using TImode. */ |
| return (!TARGET_64BIT && mode == TImode |
| && type && TREE_CODE (type) != VECTOR_TYPE); |
| } |
| |
| /* It returns the size, in bytes, of the area reserved for arguments passed |
| in registers for the function represented by fndecl dependent to the used |
| abi format. */ |
| int |
| ix86_reg_parm_stack_space (const_tree fndecl) |
| { |
| enum calling_abi call_abi = SYSV_ABI; |
| if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
| call_abi = ix86_function_abi (fndecl); |
| else |
| call_abi = ix86_function_type_abi (fndecl); |
| if (TARGET_64BIT && call_abi == MS_ABI) |
| return 32; |
| return 0; |
| } |
| |
| /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the |
| call abi used. */ |
| enum calling_abi |
| ix86_function_type_abi (const_tree fntype) |
| { |
| if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE) |
| { |
| enum calling_abi abi = ix86_abi; |
| if (abi == SYSV_ABI) |
| { |
| if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) |
| abi = MS_ABI; |
| } |
| else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) |
| abi = SYSV_ABI; |
| return abi; |
| } |
| return ix86_abi; |
| } |
| |
| /* We add this as a workaround in order to use libc_has_function |
| hook in i386.md. */ |
| bool |
| ix86_libc_has_function (enum function_class fn_class) |
| { |
| return targetm.libc_has_function (fn_class); |
| } |
| |
| static bool |
| ix86_function_ms_hook_prologue (const_tree fn) |
| { |
| if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) |
| { |
| if (decl_function_context (fn) != NULL_TREE) |
| error_at (DECL_SOURCE_LOCATION (fn), |
| "ms_hook_prologue is not compatible with nested function"); |
| else |
| return true; |
| } |
| return false; |
| } |
| |
| static enum calling_abi |
| ix86_function_abi (const_tree fndecl) |
| { |
| if (! fndecl) |
| return ix86_abi; |
| return ix86_function_type_abi (TREE_TYPE (fndecl)); |
| } |
| |
| /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the |
| call abi used. */ |
| enum calling_abi |
| ix86_cfun_abi (void) |
| { |
| if (! cfun) |
| return ix86_abi; |
| return cfun->machine->call_abi; |
| } |
| |
| /* Write the extra assembler code needed to declare a function properly. */ |
| |
| void |
| ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, |
| tree decl) |
| { |
| bool is_ms_hook = ix86_function_ms_hook_prologue (decl); |
| |
| if (is_ms_hook) |
| { |
| int i, filler_count = (TARGET_64BIT ? 32 : 16); |
| unsigned int filler_cc = 0xcccccccc; |
| |
| for (i = 0; i < filler_count; i += 4) |
| fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); |
| } |
| |
| #ifdef SUBTARGET_ASM_UNWIND_INIT |
| SUBTARGET_ASM_UNWIND_INIT (asm_out_file); |
| #endif |
| |
| ASM_OUTPUT_LABEL (asm_out_file, fname); |
| |
| /* Output magic byte marker, if hot-patch attribute is set. */ |
| if (is_ms_hook) |
| { |
| if (TARGET_64BIT) |
| { |
| /* leaq [%rsp + 0], %rsp */ |
| asm_fprintf (asm_out_file, ASM_BYTE |
| "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n"); |
| } |
| else |
| { |
| /* movl.s %edi, %edi |
| push %ebp |
| movl.s %esp, %ebp */ |
| asm_fprintf (asm_out_file, ASM_BYTE |
| "0x8b, 0xff, 0x55, 0x8b, 0xec\n"); |
| } |
| } |
| } |
| |
| /* regclass.c */ |
| extern void init_regs (void); |
| |
| /* Implementation of call abi switching target hook. Specific to FNDECL |
| the specific call register sets are set. See also |
| ix86_conditional_register_usage for more details. */ |
| void |
| ix86_call_abi_override (const_tree fndecl) |
| { |
| if (fndecl == NULL_TREE) |
| cfun->machine->call_abi = ix86_abi; |
| else |
| cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); |
| } |
| |
| /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid |
| expensive re-initialization of init_regs each time we switch function context |
| since this is needed only during RTL expansion. */ |
| static void |
| ix86_maybe_switch_abi (void) |
| { |
| if (TARGET_64BIT && |
| call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI)) |
| reinit_regs (); |
| } |
| |
| /* Initialize a variable CUM of type CUMULATIVE_ARGS |
| for a call to a function whose data type is FNTYPE. |
| For a library call, FNTYPE is 0. */ |
| |
| void |
| init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
| tree fntype, /* tree ptr for function decl */ |
| rtx libname, /* SYMBOL_REF of library name or 0 */ |
| tree fndecl, |
| int caller) |
| { |
| struct cgraph_local_info *i; |
| |
| memset (cum, 0, sizeof (*cum)); |
| |
| if (fndecl) |
| { |
| i = cgraph_local_info (fndecl); |
| cum->call_abi = ix86_function_abi (fndecl); |
| } |
| else |
| { |
| i = NULL; |
| cum->call_abi = ix86_function_type_abi (fntype); |
| } |
| |
| cum->caller = caller; |
| |
| /* Set up the number of registers to use for passing arguments. */ |
| cum->nregs = ix86_regparm; |
| if (TARGET_64BIT) |
| { |
| cum->nregs = (cum->call_abi == SYSV_ABI |
| ? X86_64_REGPARM_MAX |
| : X86_64_MS_REGPARM_MAX); |
| } |
| if (TARGET_SSE) |
| { |
| cum->sse_nregs = SSE_REGPARM_MAX; |
| if (TARGET_64BIT) |
| { |
| cum->sse_nregs = (cum->call_abi == SYSV_ABI |
| ? X86_64_SSE_REGPARM_MAX |
| : X86_64_MS_SSE_REGPARM_MAX); |
| } |
| } |
| if (TARGET_MMX) |
| cum->mmx_nregs = MMX_REGPARM_MAX; |
| cum->warn_avx512f = true; |
| cum->warn_avx = true; |
| cum->warn_sse = true; |
| cum->warn_mmx = true; |
| |
| /* Because type might mismatch in between caller and callee, we need to |
| use actual type of function for local calls. |
| FIXME: cgraph_analyze can be told to actually record if function uses |
| va_start so for local functions maybe_vaarg can be made aggressive |
| helping K&R code. |
| FIXME: once typesytem is fixed, we won't need this code anymore. */ |
| if (i && i->local && i->can_change_signature) |
| fntype = TREE_TYPE (fndecl); |
| cum->maybe_vaarg = (fntype |
| ? (!prototype_p (fntype) || stdarg_p (fntype)) |
| : !libname); |
| |
| if (!TARGET_64BIT) |
| { |
| /* If there are variable arguments, then we won't pass anything |
| in registers in 32-bit mode. */ |
| if (stdarg_p (fntype)) |
| { |
| cum->nregs = 0; |
| cum->sse_nregs = 0; |
| cum->mmx_nregs = 0; |
| cum->warn_avx512f = false; |
| cum->warn_avx = false; |
| cum->warn_sse = false; |
| cum->warn_mmx = false; |
| return; |
| } |
| |
| /* Use ecx and edx registers if function has fastcall attribute, |
| else look for regparm information. */ |
| if (fntype) |
| { |
| unsigned int ccvt = ix86_get_callcvt (fntype); |
| if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| { |
| cum->nregs = 1; |
| cum->fastcall = 1; /* Same first register as in fastcall. */ |
| } |
| else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| { |
| cum->nregs = 2; |
| cum->fastcall = 1; |
| } |
| else |
| cum->nregs = ix86_function_regparm (fntype, fndecl); |
| } |
| |
| /* Set up the number of SSE registers used for passing SFmode |
| and DFmode arguments. Warn for mismatching ABI. */ |
| cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); |
| } |
| } |
| |
| /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
| But in the case of vector types, it is some vector mode. |
| |
| When we have only some of our vector isa extensions enabled, then there |
| are some modes for which vector_mode_supported_p is false. For these |
| modes, the generic vector support in gcc will choose some non-vector mode |
| in order to implement the type. By computing the natural mode, we'll |
| select the proper ABI location for the operand and not depend on whatever |
| the middle-end decides to do with these vector types. |
| |
| The midde-end can't deal with the vector types > 16 bytes. In this |
| case, we return the original mode and warn ABI change if CUM isn't |
| NULL. |
| |
| If INT_RETURN is true, warn ABI change if the vector mode isn't |
| available for function return value. */ |
| |
| static enum machine_mode |
| type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
| bool in_return) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| |
| if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| if ((size == 8 || size == 16 || size == 32 || size == 64) |
| /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
| && TYPE_VECTOR_SUBPARTS (type) > 1) |
| { |
| enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
| |
| if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) |
| mode = MIN_MODE_VECTOR_FLOAT; |
| else |
| mode = MIN_MODE_VECTOR_INT; |
| |
| /* Get the mode which has this inner mode and number of units. */ |
| for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) |
| if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) |
| && GET_MODE_INNER (mode) == innermode) |
| { |
| if (size == 64 && !TARGET_AVX512F) |
| { |
| static bool warnedavx512f; |
| static bool warnedavx512f_ret; |
| |
| if (cum && cum->warn_avx512f && !warnedavx512f) |
| { |
| if (warning (OPT_Wpsabi, "AVX512F vector argument " |
| "without AVX512F enabled changes the ABI")) |
| warnedavx512f = true; |
| } |
| else if (in_return && !warnedavx512f_ret) |
| { |
| if (warning (OPT_Wpsabi, "AVX512F vector return " |
| "without AVX512F enabled changes the ABI")) |
| warnedavx512f_ret = true; |
| } |
| |
| return TYPE_MODE (type); |
| } |
| else if (size == 32 && !TARGET_AVX) |
| { |
| static bool warnedavx; |
| static bool warnedavx_ret; |
| |
| if (cum && cum->warn_avx && !warnedavx) |
| { |
| if (warning (OPT_Wpsabi, "AVX vector argument " |
| "without AVX enabled changes the ABI")) |
| warnedavx = true; |
| } |
| else if (in_return && !warnedavx_ret) |
| { |
| if (warning (OPT_Wpsabi, "AVX vector return " |
| "without AVX enabled changes the ABI")) |
| warnedavx_ret = true; |
| } |
| |
| return TYPE_MODE (type); |
| } |
| else if (((size == 8 && TARGET_64BIT) || size == 16) |
| && !TARGET_SSE) |
| { |
| static bool warnedsse; |
| static bool warnedsse_ret; |
| |
| if (cum && cum->warn_sse && !warnedsse) |
| { |
| if (warning (OPT_Wpsabi, "SSE vector argument " |
| "without SSE enabled changes the ABI")) |
| warnedsse = true; |
| } |
| else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
| { |
| if (warning (OPT_Wpsabi, "SSE vector return " |
| "without SSE enabled changes the ABI")) |
| warnedsse_ret = true; |
| } |
| } |
| else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX) |
| { |
| static bool warnedmmx; |
| static bool warnedmmx_ret; |
| |
| if (cum && cum->warn_mmx && !warnedmmx) |
| { |
| if (warning (OPT_Wpsabi, "MMX vector argument " |
| "without MMX enabled changes the ABI")) |
| warnedmmx = true; |
| } |
| else if (in_return && !warnedmmx_ret) |
| { |
| if (warning (OPT_Wpsabi, "MMX vector return " |
| "without MMX enabled changes the ABI")) |
| warnedmmx_ret = true; |
| } |
| } |
| return mode; |
| } |
| |
| gcc_unreachable (); |
| } |
| } |
| |
| return mode; |
| } |
| |
| /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
| this may not agree with the mode that the type system has chosen for the |
| register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
| go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
| |
| static rtx |
| gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, |
| unsigned int regno) |
| { |
| rtx tmp; |
| |
| if (orig_mode != BLKmode) |
| tmp = gen_rtx_REG (orig_mode, regno); |
| else |
| { |
| tmp = gen_rtx_REG (mode, regno); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
| tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
| } |
| |
| return tmp; |
| } |
| |
| /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
| of this code is to classify each 8bytes of incoming argument by the register |
| class and assign registers accordingly. */ |
| |
| /* Return the union class of CLASS1 and CLASS2. |
| See the x86-64 PS ABI for details. */ |
| |
| static enum x86_64_reg_class |
| merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
| { |
| /* Rule #1: If both classes are equal, this is the resulting class. */ |
| if (class1 == class2) |
| return class1; |
| |
| /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
| the other class. */ |
| if (class1 == X86_64_NO_CLASS) |
| return class2; |
| if (class2 == X86_64_NO_CLASS) |
| return class1; |
| |
| /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
| if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
| if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) |
| || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) |
| return X86_64_INTEGERSI_CLASS; |
| if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
| || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
| return X86_64_INTEGER_CLASS; |
| |
| /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
| MEMORY is used. */ |
| if (class1 == X86_64_X87_CLASS |
| || class1 == X86_64_X87UP_CLASS |
| || class1 == X86_64_COMPLEX_X87_CLASS |
| || class2 == X86_64_X87_CLASS |
| || class2 == X86_64_X87UP_CLASS |
| || class2 == X86_64_COMPLEX_X87_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #6: Otherwise class SSE is used. */ |
| return X86_64_SSE_CLASS; |
| } |
| |
| /* Classify the argument of type TYPE and mode MODE. |
| CLASSES will be filled by the register class used to pass each word |
| of the operand. The number of words is returned. In case the parameter |
| should be passed in memory, 0 is returned. As a special case for zero |
| sized containers, classes[0] will be NO_CLASS and 1 is returned. |
| |
| BIT_OFFSET is used internally for handling records and specifies offset |
| of the offset in bits modulo 512 to avoid overflow cases. |
| |
| See the x86-64 PS ABI for details. |
| */ |
| |
| static int |
| classify_argument (enum machine_mode mode, const_tree type, |
| enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
| { |
| HOST_WIDE_INT bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| int words |
| = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| /* Variable sized entities are always passed/returned in memory. */ |
| if (bytes < 0) |
| return 0; |
| |
| if (mode != VOIDmode |
| && targetm.calls.must_pass_in_stack (mode, type)) |
| return 0; |
| |
| if (type && AGGREGATE_TYPE_P (type)) |
| { |
| int i; |
| tree field; |
| enum x86_64_reg_class subclasses[MAX_CLASSES]; |
| |
| /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
| if (bytes > 64) |
| return 0; |
| |
| for (i = 0; i < words; i++) |
| classes[i] = X86_64_NO_CLASS; |
| |
| /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
| signalize memory class, so handle it as special case. */ |
| if (!words) |
| { |
| classes[0] = X86_64_NO_CLASS; |
| return 1; |
| } |
| |
| /* Classify each field of record and merge classes. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| /* And now merge the fields of structure. */ |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| /* Bitfields are always classified as integer. Handle them |
| early, since later code would consider them to be |
| misaligned integers. */ |
| if (DECL_BIT_FIELD (field)) |
| { |
| for (i = (int_bit_position (field) |
| + (bit_offset % 64)) / 8 / 8; |
| i < ((int_bit_position (field) + (bit_offset % 64)) |
| + tree_to_shwi (DECL_SIZE (field)) |
| + 63) / 8 / 8; i++) |
| classes[i] = |
| merge_classes (X86_64_INTEGER_CLASS, |
| classes[i]); |
| } |
| else |
| { |
| int pos; |
| |
| type = TREE_TYPE (field); |
| |
| /* Flexible array member is ignored. */ |
| if (TYPE_MODE (type) == BLKmode |
| && TREE_CODE (type) == ARRAY_TYPE |
| && TYPE_SIZE (type) == NULL_TREE |
| && TYPE_DOMAIN (type) != NULL_TREE |
| && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
| == NULL_TREE)) |
| { |
| static bool warned; |
| |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing struct with" |
| " a flexible array member has" |
| " changed in GCC 4.4"); |
| } |
| continue; |
| } |
| num = classify_argument (TYPE_MODE (type), type, |
| subclasses, |
| (int_bit_position (field) |
| + bit_offset) % 512); |
| if (!num) |
| return 0; |
| pos = (int_bit_position (field) |
| + (bit_offset % 64)) / 8 / 8; |
| for (i = 0; i < num && (i + pos) < words; i++) |
| classes[i + pos] = |
| merge_classes (subclasses[i], classes[i + pos]); |
| } |
| } |
| } |
| break; |
| |
| case ARRAY_TYPE: |
| /* Arrays are handled as small records. */ |
| { |
| int num; |
| num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
| TREE_TYPE (type), subclasses, bit_offset); |
| if (!num) |
| return 0; |
| |
| /* The partial classes are now full classes. */ |
| if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
| subclasses[0] = X86_64_SSE_CLASS; |
| if (subclasses[0] == X86_64_INTEGERSI_CLASS |
| && !((bit_offset % 64) == 0 && bytes == 4)) |
| subclasses[0] = X86_64_INTEGER_CLASS; |
| |
| for (i = 0; i < words; i++) |
| classes[i] = subclasses[i % num]; |
| |
| break; |
| } |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| /* Unions are similar to RECORD_TYPE but offset is always 0. |
| */ |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
| TREE_TYPE (field), subclasses, |
| bit_offset); |
| if (!num) |
| return 0; |
| for (i = 0; i < num; i++) |
| classes[i] = merge_classes (subclasses[i], classes[i]); |
| } |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (words > 2) |
| { |
| /* When size > 16 bytes, if the first one isn't |
| X86_64_SSE_CLASS or any other ones aren't |
| X86_64_SSEUP_CLASS, everything should be passed in |
| memory. */ |
| if (classes[0] != X86_64_SSE_CLASS) |
| return 0; |
| |
| for (i = 1; i < words; i++) |
| if (classes[i] != X86_64_SSEUP_CLASS) |
| return 0; |
| } |
| |
| /* Final merger cleanup. */ |
| for (i = 0; i < words; i++) |
| { |
| /* If one class is MEMORY, everything should be passed in |
| memory. */ |
| if (classes[i] == X86_64_MEMORY_CLASS) |
| return 0; |
| |
| /* The X86_64_SSEUP_CLASS should be always preceded by |
| X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
| if (classes[i] == X86_64_SSEUP_CLASS |
| && classes[i - 1] != X86_64_SSE_CLASS |
| && classes[i - 1] != X86_64_SSEUP_CLASS) |
| { |
| /* The first one should never be X86_64_SSEUP_CLASS. */ |
| gcc_assert (i != 0); |
| classes[i] = X86_64_SSE_CLASS; |
| } |
| |
| /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
| everything should be passed in memory. */ |
| if (classes[i] == X86_64_X87UP_CLASS |
| && (classes[i - 1] != X86_64_X87_CLASS)) |
| { |
| static bool warned; |
| |
| /* The first one should never be X86_64_X87UP_CLASS. */ |
| gcc_assert (i != 0); |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing union with long double" |
| " has changed in GCC 4.4"); |
| } |
| return 0; |
| } |
| } |
| return words; |
| } |
| |
| /* Compute alignment needed. We align all types to natural boundaries with |
| exception of XFmode that is aligned to 64bits. */ |
| if (mode != VOIDmode && mode != BLKmode) |
| { |
| int mode_alignment = GET_MODE_BITSIZE (mode); |
| |
| if (mode == XFmode) |
| mode_alignment = 128; |
| else if (mode == XCmode) |
| mode_alignment = 256; |
| if (COMPLEX_MODE_P (mode)) |
| mode_alignment /= 2; |
| /* Misaligned fields are always returned in memory. */ |
| if (bit_offset % mode_alignment) |
| return 0; |
| } |
| |
| /* for V1xx modes, just use the base mode */ |
| if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
| && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) |
| mode = GET_MODE_INNER (mode); |
| |
| /* Classification of atomic types. */ |
| switch (mode) |
| { |
| case SDmode: |
| case DDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case TDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case DImode: |
| case SImode: |
| case HImode: |
| case QImode: |
| case CSImode: |
| case CHImode: |
| case CQImode: |
| { |
| int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
| |
| /* Analyze last 128 bits only. */ |
| size = (size - 1) & 0x7f; |
| |
| if (size < 32) |
| { |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| return 1; |
| } |
| else if (size < 64) |
| { |
| classes[0] = X86_64_INTEGER_CLASS; |
| return 1; |
| } |
| else if (size < 64+32) |
| { |
| classes[0] = X86_64_INTEGER_CLASS; |
| classes[1] = X86_64_INTEGERSI_CLASS; |
| return 2; |
| } |
| else if (size < 64+64) |
| { |
| classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| return 2; |
| } |
| else |
| gcc_unreachable (); |
| } |
| case CDImode: |
| case TImode: |
| classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| return 2; |
| case COImode: |
| case OImode: |
| /* OImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| case CTImode: |
| return 0; |
| case SFmode: |
| if (!(bit_offset % 64)) |
| classes[0] = X86_64_SSESF_CLASS; |
| else |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case DFmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| return 1; |
| case XFmode: |
| classes[0] = X86_64_X87_CLASS; |
| classes[1] = X86_64_X87UP_CLASS; |
| return 2; |
| case TFmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case SCmode: |
| classes[0] = X86_64_SSE_CLASS; |
| if (!(bit_offset % 64)) |
| return 1; |
| else |
| { |
| static bool warned; |
| |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing structure with complex float" |
| " member has changed in GCC 4.4"); |
| } |
| classes[1] = X86_64_SSESF_CLASS; |
| return 2; |
| } |
| case DCmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| classes[1] = X86_64_SSEDF_CLASS; |
| return 2; |
| case XCmode: |
| classes[0] = X86_64_COMPLEX_X87_CLASS; |
| return 1; |
| case TCmode: |
| /* This modes is larger than 16 bytes. */ |
| return 0; |
| case V8SFmode: |
| case V8SImode: |
| case V32QImode: |
| case V16HImode: |
| case V4DFmode: |
| case V4DImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| classes[2] = X86_64_SSEUP_CLASS; |
| classes[3] = X86_64_SSEUP_CLASS; |
| return 4; |
| case V8DFmode: |
| case V16SFmode: |
| case V8DImode: |
| case V16SImode: |
| case V32HImode: |
| case V64QImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| classes[2] = X86_64_SSEUP_CLASS; |
| classes[3] = X86_64_SSEUP_CLASS; |
| classes[4] = X86_64_SSEUP_CLASS; |
| classes[5] = X86_64_SSEUP_CLASS; |
| classes[6] = X86_64_SSEUP_CLASS; |
| classes[7] = X86_64_SSEUP_CLASS; |
| return 8; |
| case V4SFmode: |
| case V4SImode: |
| case V16QImode: |
| case V8HImode: |
| case V2DFmode: |
| case V2DImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case V1TImode: |
| case V1DImode: |
| case V2SFmode: |
| case V2SImode: |
| case V4HImode: |
| case V8QImode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case BLKmode: |
| case VOIDmode: |
| return 0; |
| default: |
| gcc_assert (VECTOR_MODE_P (mode)); |
| |
| if (bytes > 16) |
| return 0; |
| |
| gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
| |
| if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| else |
| classes[0] = X86_64_INTEGER_CLASS; |
| classes[1] = X86_64_INTEGER_CLASS; |
| return 1 + (bytes > 8); |
| } |
| } |
| |
| /* Examine the argument and return set number of register required in each |
| class. Return true iff parameter should be passed in memory. */ |
| |
| static bool |
| examine_argument (enum machine_mode mode, const_tree type, int in_return, |
| int *int_nregs, int *sse_nregs) |
| { |
| enum x86_64_reg_class regclass[MAX_CLASSES]; |
| int n = classify_argument (mode, type, regclass, 0); |
| |
| *int_nregs = 0; |
| *sse_nregs = 0; |
| |
| if (!n) |
| return true; |
| for (n--; n >= 0; n--) |
| switch (regclass[n]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| (*int_nregs)++; |
| break; |
| case X86_64_SSE_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| (*sse_nregs)++; |
| break; |
| case X86_64_NO_CLASS: |
| case X86_64_SSEUP_CLASS: |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_X87UP_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| if (!in_return) |
| return true; |
| break; |
| case X86_64_MEMORY_CLASS: |
| gcc_unreachable (); |
| } |
| |
| return false; |
| } |
| |
| /* Construct container for the argument used by GCC interface. See |
| FUNCTION_ARG for the detailed description. */ |
| |
| static rtx |
| construct_container (enum machine_mode mode, enum machine_mode orig_mode, |
| const_tree type, int in_return, int nintregs, int nsseregs, |
| const int *intreg, int sse_regno) |
| { |
| /* The following variables hold the static issued_error state. */ |
| static bool issued_sse_arg_error; |
| static bool issued_sse_ret_error; |
| static bool issued_x87_ret_error; |
| |
| enum machine_mode tmpmode; |
| int bytes = |
| (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| enum x86_64_reg_class regclass[MAX_CLASSES]; |
| int n; |
| int i; |
| int nexps = 0; |
| int needed_sseregs, needed_intregs; |
| rtx exp[MAX_CLASSES]; |
| rtx ret; |
| |
| n = classify_argument (mode, type, regclass, 0); |
| if (!n) |
| return NULL; |
| if (examine_argument (mode, type, in_return, &needed_intregs, |
| &needed_sseregs)) |
| return NULL; |
| if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
| return NULL; |
| |
| /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
| some less clueful developer tries to use floating-point anyway. */ |
| if (needed_sseregs && !TARGET_SSE) |
| { |
| if (in_return) |
| { |
| if (!issued_sse_ret_error) |
| { |
| error ("SSE register return with SSE disabled"); |
| issued_sse_ret_error = true; |
| } |
| } |
| else if (!issued_sse_arg_error) |
| { |
| error ("SSE register argument with SSE disabled"); |
| issued_sse_arg_error = true; |
| } |
| return NULL; |
| } |
| |
| /* Likewise, error if the ABI requires us to return values in the |
| x87 registers and the user specified -mno-80387. */ |
| if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
| for (i = 0; i < n; i++) |
| if (regclass[i] == X86_64_X87_CLASS |
| || regclass[i] == X86_64_X87UP_CLASS |
| || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
| { |
| if (!issued_x87_ret_error) |
| { |
| error ("x87 register return with x87 disabled"); |
| issued_x87_ret_error = true; |
| } |
| return NULL; |
| } |
| |
| /* First construct simple cases. Avoid SCmode, since we want to use |
| single register to pass this type. */ |
| if (n == 1 && mode != SCmode) |
| switch (regclass[0]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| return gen_rtx_REG (mode, intreg[0]); |
| case X86_64_SSE_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| if (mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| SSE_REGNO (sse_regno)); |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| return gen_rtx_REG (mode, FIRST_STACK_REG); |
| case X86_64_NO_CLASS: |
| /* Zero sized array, struct or class. */ |
| return NULL; |
| default: |
| gcc_unreachable (); |
| } |
| if (n == 2 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| SSE_REGNO (sse_regno)); |
| if (n == 4 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| SSE_REGNO (sse_regno)); |
| if (n == 8 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && regclass[4] == X86_64_SSEUP_CLASS |
| && regclass[5] == X86_64_SSEUP_CLASS |
| && regclass[6] == X86_64_SSEUP_CLASS |
| && regclass[7] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| SSE_REGNO (sse_regno)); |
| if (n == 2 |
| && regclass[0] == X86_64_X87_CLASS |
| && regclass[1] == X86_64_X87UP_CLASS) |
| return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
| |
| if (n == 2 |
| && regclass[0] == X86_64_INTEGER_CLASS |
| && regclass[1] == X86_64_INTEGER_CLASS |
| && (mode == CDImode || mode == TImode) |
| && intreg[0] + 1 == intreg[1]) |
| return gen_rtx_REG (mode, intreg[0]); |
| |
| /* Otherwise figure out the entries of the PARALLEL. */ |
| for (i = 0; i < n; i++) |
| { |
| int pos; |
| |
| switch (regclass[i]) |
| { |
| case X86_64_NO_CLASS: |
| break; |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| /* Merge TImodes on aligned occasions here too. */ |
| if (i * 8 + 8 > bytes) |
| tmpmode |
| = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); |
| else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
| tmpmode = SImode; |
| else |
| tmpmode = DImode; |
| /* We've requested 24 bytes we |
| don't have mode for. Use DImode. */ |
| if (tmpmode == BLKmode) |
| tmpmode = DImode; |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, *intreg), |
| GEN_INT (i*8)); |
| intreg++; |
| break; |
| case X86_64_SSESF_CLASS: |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (SFmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSEDF_CLASS: |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (DFmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSE_CLASS: |
| pos = i; |
| switch (n) |
| { |
| case 1: |
| tmpmode = DImode; |
| break; |
| case 2: |
| if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
| { |
| tmpmode = TImode; |
| i++; |
| } |
| else |
| tmpmode = DImode; |
| break; |
| case 4: |
| gcc_assert (i == 0 |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS); |
| tmpmode = OImode; |
| i += 3; |
| break; |
| case 8: |
| gcc_assert (i == 0 |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && regclass[4] == X86_64_SSEUP_CLASS |
| && regclass[5] == X86_64_SSEUP_CLASS |
| && regclass[6] == X86_64_SSEUP_CLASS |
| && regclass[7] == X86_64_SSEUP_CLASS); |
| tmpmode = XImode; |
| i += 7; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, |
| SSE_REGNO (sse_regno)), |
| GEN_INT (pos*8)); |
| sse_regno++; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Empty aligned struct, union or class. */ |
| if (nexps == 0) |
| return NULL; |
| |
| ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
| for (i = 0; i < nexps; i++) |
| XVECEXP (ret, 0, i) = exp [i]; |
| return ret; |
| } |
| |
| /* Update the data in CUM to advance over an argument of mode MODE |
| and data type TYPE. (TYPE is null for libcalls where that information |
| may not be available.) */ |
| |
| static void |
| function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| const_tree type, HOST_WIDE_INT bytes, |
| HOST_WIDE_INT words) |
| { |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| |
| case DImode: |
| case SImode: |
| case HImode: |
| case QImode: |
| cum->words += words; |
| cum->nregs -= words; |
| cum->regno += words; |
| |
| if (cum->nregs <= 0) |
| { |
| cum->nregs = 0; |
| cum->regno = 0; |
| } |
| break; |
| |
| case OImode: |
| /* OImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| |
| case DFmode: |
| if (cum->float_in_sse < 2) |
| break; |
| case SFmode: |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| |
| case V8SFmode: |
| case V8SImode: |
| case V64QImode: |
| case V32HImode: |
| case V16SImode: |
| case V8DImode: |
| case V16SFmode: |
| case V8DFmode: |
| case V32QImode: |
| case V16HImode: |
| case V4DFmode: |
| case V4DImode: |
| case TImode: |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->sse_words += words; |
| cum->sse_nregs -= 1; |
| cum->sse_regno += 1; |
| if (cum->sse_nregs <= 0) |
| { |
| cum->sse_nregs = 0; |
| cum->sse_regno = 0; |
| } |
| } |
| break; |
| |
| case V8QImode: |
| case V4HImode: |
| case V2SImode: |
| case V2SFmode: |
| case V1TImode: |
| case V1DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->mmx_words += words; |
| cum->mmx_nregs -= 1; |
| cum->mmx_regno += 1; |
| if (cum->mmx_nregs <= 0) |
| { |
| cum->mmx_nregs = 0; |
| cum->mmx_regno = 0; |
| } |
| } |
| break; |
| } |
| } |
| |
| static void |
| function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| const_tree type, HOST_WIDE_INT words, bool named) |
| { |
| int int_nregs, sse_nregs; |
| |
| /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
| if (!named && (VALID_AVX512F_REG_MODE (mode) |
| || VALID_AVX256_REG_MODE (mode))) |
| return; |
| |
| if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) |
| && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
| { |
| cum->nregs -= int_nregs; |
| cum->sse_nregs -= sse_nregs; |
| cum->regno += int_nregs; |
| cum->sse_regno += sse_nregs; |
| } |
| else |
| { |
| int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
| cum->words = (cum->words + align - 1) & ~(align - 1); |
| cum->words += words; |
| } |
| } |
| |
| static void |
| function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
| HOST_WIDE_INT words) |
| { |
| /* Otherwise, this should be passed indirect. */ |
| gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
| |
| cum->words += words; |
| if (cum->nregs > 0) |
| { |
| cum->nregs -= 1; |
| cum->regno += 1; |
| } |
| } |
| |
| /* Update the data in CUM to advance over an argument of mode MODE and |
| data type TYPE. (TYPE is null for libcalls where that information |
| may not be available.) */ |
| |
| static void |
| ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, |
| const_tree type, bool named) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| HOST_WIDE_INT bytes, words; |
| |
| if (mode == BLKmode) |
| bytes = int_size_in_bytes (type); |
| else |
| bytes = GET_MODE_SIZE (mode); |
| words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| if (type) |
| mode = type_natural_mode (type, NULL, false); |
| |
| if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
| function_arg_advance_ms_64 (cum, bytes, words); |
| else if (TARGET_64BIT) |
| function_arg_advance_64 (cum, mode, type, words, named); |
| else |
| function_arg_advance_32 (cum, mode, type, bytes, words); |
| } |
| |
| /* Define where to put the arguments to a function. |
| Value is zero to push the argument on the stack, |
| or a hard register in which to store the argument. |
| |
| MODE is the argument's machine mode. |
| TYPE is the data type of the argument (as a tree). |
| This is null for libcalls where that information may |
| not be available. |
| CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| the preceding args and about the function being called. |
| NAMED is nonzero if this argument is a named parameter |
| (otherwise it is an extra parameter matching an ellipsis). */ |
| |
| static rtx |
| function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| enum machine_mode orig_mode, const_tree type, |
| HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
| { |
| /* Avoid the AL settings for the Unix64 ABI. */ |
| if (mode == VOIDmode) |
| return constm1_rtx; |
| |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| case DImode: |
| case SImode: |
| case HImode: |
| case QImode: |
| if (words <= cum->nregs) |
| { |
| int regno = cum->regno; |
| |
| /* Fastcall allocates the first two DWORD (SImode) or |
| smaller arguments to ECX and EDX if it isn't an |
| aggregate type . */ |
| if (cum->fastcall) |
| { |
| if (mode == BLKmode |
| || mode == DImode |
| || (type && AGGREGATE_TYPE_P (type))) |
| break; |
| |
| /* ECX not EAX is the first allocated register. */ |
| if (regno == AX_REG) |
| regno = CX_REG; |
| } |
| return gen_rtx_REG (mode, regno); |
| } |
| break; |
| |
| case DFmode: |
| if (cum->float_in_sse < 2) |
| break; |
| case SFmode: |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| case TImode: |
| /* In 32bit, we pass TImode in xmm registers. */ |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| case V4SFmode: |
| case V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->sse_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->sse_regno + FIRST_SSE_REG); |
| } |
| break; |
| |
| case OImode: |
| case XImode: |
| /* OImode and XImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| |
| case V64QImode: |
| case V32HImode: |
| case V16SImode: |
| case V8DImode: |
| case V16SFmode: |
| case V8DFmode: |
| case V8SFmode: |
| case V8SImode: |
| case V32QImode: |
| case V16HImode: |
| case V4DFmode: |
| case V4DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->sse_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->sse_regno + FIRST_SSE_REG); |
| } |
| break; |
| |
| case V8QImode: |
| case V4HImode: |
| case V2SImode: |
| case V2SFmode: |
| case V1TImode: |
| case V1DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->mmx_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->mmx_regno + FIRST_MMX_REG); |
| } |
| break; |
| } |
| |
| return NULL_RTX; |
| } |
| |
| static rtx |
| function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| enum machine_mode orig_mode, const_tree type, bool named) |
| { |
| /* Handle a hidden AL argument containing number of registers |
| for varargs x86-64 functions. */ |
| if (mode == VOIDmode) |
| return GEN_INT (cum->maybe_vaarg |
| ? (cum->sse_nregs < 0 |
| ? X86_64_SSE_REGPARM_MAX |
| : cum->sse_regno) |
| : -1); |
| |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case V8SFmode: |
| case V8SImode: |
| case V32QImode: |
| case V16HImode: |
| case V4DFmode: |
| case V4DImode: |
| case V16SFmode: |
| case V16SImode: |
| case V64QImode: |
| case V32HImode: |
| case V8DFmode: |
| case V8DImode: |
| /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
| if (!named) |
| return NULL; |
| break; |
| } |
| |
| return construct_container (mode, orig_mode, type, 0, cum->nregs, |
| cum->sse_nregs, |
| &x86_64_int_parameter_registers [cum->regno], |
| cum->sse_regno); |
| } |
| |
| static rtx |
| function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode, |
| enum machine_mode orig_mode, bool named, |
| HOST_WIDE_INT bytes) |
| { |
| unsigned int regno; |
| |
| /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. |
| We use value of -2 to specify that current function call is MSABI. */ |
| if (mode == VOIDmode) |
| return GEN_INT (-2); |
| |
| /* If we've run out of registers, it goes on the stack. */ |
| if (cum->nregs == 0) |
| return NULL_RTX; |
| |
| regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; |
| |
| /* Only floating point modes are passed in anything but integer regs. */ |
| if (TARGET_SSE && (mode == SFmode || mode == DFmode)) |
| { |
| if (named) |
| regno = cum->regno + FIRST_SSE_REG; |
| else |
| { |
| rtx t1, t2; |
| |
| /* Unnamed floating parameters are passed in both the |
| SSE and integer registers. */ |
| t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); |
| t2 = gen_rtx_REG (mode, regno); |
| t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); |
| t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); |
| return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); |
| } |
| } |
| /* Handle aggregated types passed in register. */ |
| if (orig_mode == BLKmode) |
| { |
| if (bytes > 0 && bytes <= 8) |
| mode = (bytes > 4 ? DImode : SImode); |
| if (mode == BLKmode) |
| mode = DImode; |
| } |
| |
| return gen_reg_or_parallel (mode, orig_mode, regno); |
| } |
| |
| /* Return where to put the arguments to a function. |
| Return zero to push the argument on the stack, or a hard register in which to store the argument. |
| |
| MODE is the argument's machine mode. TYPE is the data type of the |
| argument. It is null for libcalls where that information may not be |
| available. CUM gives information about the preceding args and about |
| the function being called. NAMED is nonzero if this argument is a |
| named parameter (otherwise it is an extra parameter matching an |
| ellipsis). */ |
| |
| static rtx |
| ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode, |
| const_tree type, bool named) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| enum machine_mode mode = omode; |
| HOST_WIDE_INT bytes, words; |
| rtx arg; |
| |
| if (mode == BLKmode) |
| bytes = int_size_in_bytes (type); |
| else |
| bytes = GET_MODE_SIZE (mode); |
| words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| /* To simplify the code below, represent vector types with a vector mode |
| even if MMX/SSE are not active. */ |
| if (type && TREE_CODE (type) == VECTOR_TYPE) |
| mode = type_natural_mode (type, cum, false); |
| |
| if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
| arg = function_arg_ms_64 (cum, mode, omode, named, bytes); |
| else if (TARGET_64BIT) |
| arg = function_arg_64 (cum, mode, omode, type, named); |
| else |
| arg = function_arg_32 (cum, mode, omode, type, bytes, words); |
| |
| return arg; |
| } |
| |
| /* A C expression that indicates when an argument must be passed by |
| reference. If nonzero for an argument, a copy of that argument is |
| made in memory and a pointer to the argument is passed instead of |
| the argument itself. The pointer is passed in whatever way is |
| appropriate for passing a pointer to that type. */ |
| |
| static bool |
| ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode, |
| const_tree type, bool named ATTRIBUTE_UNUSED) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| |
| /* See Windows x64 Software Convention. */ |
| if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
| { |
| int msize = (int) GET_MODE_SIZE (mode); |
| if (type) |
| { |
| /* Arrays are passed by reference. */ |
| if (TREE_CODE (type) == ARRAY_TYPE) |
| return true; |
| |
| if (AGGREGATE_TYPE_P (type)) |
| { |
| /* Structs/unions of sizes other than 8, 16, 32, or 64 bits |
| are passed by reference. */ |
| msize = int_size_in_bytes (type); |
| } |
| } |
| |
| /* __m128 is passed by reference. */ |
| switch (msize) { |
| case 1: case 2: case 4: case 8: |
| break; |
| default: |
| return true; |
| } |
| } |
| else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Return true when TYPE should be 128bit aligned for 32bit argument |
| passing ABI. XXX: This function is obsolete and is only used for |
| checking psABI compatibility with previous versions of GCC. */ |
| |
| static bool |
| ix86_compat_aligned_value_p (const_tree type) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
| || mode == TDmode |
| || mode == TFmode |
| || mode == TCmode) |
| && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) |
| return true; |
| if (TYPE_ALIGN (type) < 128) |
| return false; |
| |
| if (AGGREGATE_TYPE_P (type)) |
| { |
| /* Walk the aggregates recursively. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| { |
| tree field; |
| |
| /* Walk all the structure fields. */ |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL |
| && ix86_compat_aligned_value_p (TREE_TYPE (field))) |
| return true; |
| } |
| break; |
| } |
| |
| case ARRAY_TYPE: |
| /* Just for use if some languages passes arrays by value. */ |
| if (ix86_compat_aligned_value_p (TREE_TYPE (type))) |
| return true; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| return false; |
| } |
| |
| /* Return the alignment boundary for MODE and TYPE with alignment ALIGN. |
| XXX: This function is obsolete and is only used for checking psABI |
| compatibility with previous versions of GCC. */ |
| |
| static unsigned int |
| ix86_compat_function_arg_boundary (enum machine_mode mode, |
| const_tree type, unsigned int align) |
| { |
| /* In 32bit, only _Decimal128 and __float128 are aligned to their |
| natural boundaries. */ |
| if (!TARGET_64BIT && mode != TDmode && mode != TFmode) |
| { |
| /* i386 ABI defines all arguments to be 4 byte aligned. We have to |
| make an exception for SSE modes since these require 128bit |
| alignment. |
| |
| The handling here differs from field_alignment. ICC aligns MMX |
| arguments to 4 byte boundaries, while structure fields are aligned |
| to 8 byte boundaries. */ |
| if (!type) |
| { |
| if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) |
| align = PARM_BOUNDARY; |
| } |
| else |
| { |
| if (!ix86_compat_aligned_value_p (type)) |
| align = PARM_BOUNDARY; |
| } |
| } |
| if (align > BIGGEST_ALIGNMENT) |
| align = BIGGEST_ALIGNMENT; |
| return align; |
| } |
| |
| /* Return true when TYPE should be 128bit aligned for 32bit argument |
| passing ABI. */ |
| |
| static bool |
| ix86_contains_aligned_value_p (const_tree type) |
| { |
| enum machine_mode mode = TYPE_MODE (type); |
| |
| if (mode == XFmode || mode == XCmode) |
| return false; |
| |
| if (TYPE_ALIGN (type) < 128) |
| return false; |
| |
| if (AGGREGATE_TYPE_P (type)) |
| { |
| /* Walk the aggregates recursively. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| { |
| tree field; |
| |
| /* Walk all the structure fields. */ |
| for (field = TYPE_FIELDS (type); |
| field; |
| field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL |
| && ix86_contains_aligned_value_p (TREE_TYPE (field))) |
| return true; |
| } |
| break; |
| } |
| |
| case ARRAY_TYPE: |
| /* Just for use if some languages passes arrays by value. */ |
| if (ix86_contains_aligned_value_p (TREE_TYPE (type))) |
| return true; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| else |
| return TYPE_ALIGN (type) >= 128; |
| |
| return false; |
| } |
| |
| /* Gives the alignment boundary, in bits, of an argument with the |
| specified mode and type. */ |
| |
| static unsigned int |
| ix86_function_arg_boundary (enum machine_mode mode, const_tree type) |
| { |
| unsigned int align; |
| if (type) |
| { |
| /* Since the main variant type is used for call, we convert it to |
| the main variant type. */ |
| type = TYPE_MAIN_VARIANT (type); |
| align = TYPE_ALIGN (type); |
| } |
| else |
| align = GET_MODE_ALIGNMENT (mode); |
| if (align < PARM_BOUNDARY) |
| align = PARM_BOUNDARY; |
| else |
| { |
| static bool warned; |
| unsigned int saved_align = align; |
| |
| if (!TARGET_64BIT) |
| { |
| /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ |
| if (!type) |
| { |
| if (mode == XFmode || mode == XCmode) |
| align = PARM_BOUNDARY; |
| } |
| else if (!ix86_contains_aligned_value_p (type)) |
| align = PARM_BOUNDARY; |
| |
| if (align < 128) |
| align = PARM_BOUNDARY; |
| } |
| |
| if (warn_psabi |
| && !warned |
| && align != ix86_compat_function_arg_boundary (mode, type, |
| saved_align)) |
| { |
| warned = true; |
| inform (input_location, |
| "The ABI for passing parameters with %d-byte" |
| " alignment has changed in GCC 4.6", |
| align / BITS_PER_UNIT); |
| } |
| } |
| |
| return align; |
| } |
| |
| /* Return true if N is a possible register number of function value. */ |
| |
| static bool |
| ix86_function_value_regno_p (const unsigned int regno) |
| { |
| switch (regno) |
| { |
| case AX_REG: |
| case DX_REG: |
| return true; |
| case DI_REG: |
| case SI_REG: |
| return TARGET_64BIT && ix86_abi != MS_ABI; |
| |
| /* Complex values are returned in %st(0)/%st(1) pair. */ |
| case ST0_REG: |
| case ST1_REG: |
| /* TODO: The function should depend on current function ABI but |
| builtins.c would need updating then. Therefore we use the |
| default ABI. */ |
| if (TARGET_64BIT && ix86_abi == MS_ABI) |
| return false; |
| return TARGET_FLOAT_RETURNS_IN_80387; |
| |
| /* Complex values are returned in %xmm0/%xmm1 pair. */ |
| case XMM0_REG: |
| case XMM1_REG: |
| return TARGET_SSE; |
| |
| case MM0_REG: |
| if (TARGET_MACHO || TARGET_64BIT) |
| return false; |
| return TARGET_MMX; |
| } |
| |
| return false; |
| } |
| |
| /* Define how to find the value returned by a function. |
| VALTYPE is the data type of the value (as a tree). |
| If the precise function being called is known, FUNC is its FUNCTION_DECL; |
| otherwise, FUNC is 0. */ |
| |
| static rtx |
| function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, |
| const_tree fntype, const_tree fn) |
| { |
| unsigned int regno; |
| |
| /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where |
| we normally prevent this case when mmx is not available. However |
| some ABIs may require the result to be returned like DImode. */ |
| if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) |
| regno = FIRST_MMX_REG; |
| |
| /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where |
| we prevent this case when sse is not available. However some ABIs |
| may require the result to be returned like integer TImode. */ |
| else if (mode == TImode |
| || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) |
| regno = FIRST_SSE_REG; |
| |
| /* 32-byte vector modes in %ymm0. */ |
| else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) |
| regno = FIRST_SSE_REG; |
| |
| /* 64-byte vector modes in %zmm0. */ |
| else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) |
| regno = FIRST_SSE_REG; |
| |
| /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ |
| else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) |
| regno = FIRST_FLOAT_REG; |
| else |
| /* Most things go in %eax. */ |
| regno = AX_REG; |
| |
| /* Override FP return register with %xmm0 for local functions when |
| SSE math is enabled or for functions with sseregparm attribute. */ |
| if ((fn || fntype) && (mode == SFmode || mode == DFmode)) |
| { |
| int sse_level = ix86_function_sseregparm (fntype, fn, false); |
| if ((sse_level >= 1 && mode == SFmode) |
| || (sse_level == 2 && mode == DFmode)) |
| regno = FIRST_SSE_REG; |
| } |
| |
| /* OImode shouldn't be used directly. */ |
| gcc_assert (mode != OImode); |
| |
| return gen_rtx_REG (orig_mode, regno); |
| } |
| |
| static rtx |
| function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, |
| const_tree valtype) |
| { |
| rtx ret; |
| |
| /* Handle libcalls, which don't provide a type node. */ |
| if (valtype == NULL) |
| { |
| unsigned int regno; |
| |
| switch (mode) |
| { |
| case SFmode: |
| case SCmode: |
| case DFmode: |
| case DCmode: |
| case TFmode: |
| case SDmode: |
| case DDmode: |
| case TDmode: |
| regno = FIRST_SSE_REG; |
| break; |
| case XFmode: |
| case XCmode: |
| regno = FIRST_FLOAT_REG; |
| break; |
| case TCmode: |
| return NULL; |
| default: |
| regno = AX_REG; |
| } |
| |
| return gen_rtx_REG (mode, regno); |
| } |
| else if (POINTER_TYPE_P (valtype)) |
| { |
| /* Pointers are always returned in word_mode. */ |
| mode = word_mode; |
| } |
| |
| ret = construct_container (mode, orig_mode, valtype, 1, |
| X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, |
| x86_64_int_return_registers, 0); |
| |
| /* For zero sized structures, construct_container returns NULL, but we |
| need to keep rest of compiler happy by returning meaningful value. */ |
| if (!ret) |
| ret = gen_rtx_REG (orig_mode, AX_REG); |
| |
| return ret; |
| } |
| |
| static rtx |
| function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode, |
| const_tree valtype) |
| { |
| unsigned int regno = AX_REG; |
| |
| if (TARGET_SSE) |
| { |
| switch (GET_MODE_SIZE (mode)) |
| { |
| case 16: |
| if (valtype != NULL_TREE |
| && !VECTOR_INTEGER_TYPE_P (valtype) |
| && !VECTOR_INTEGER_TYPE_P (valtype) |
| && !INTEGRAL_TYPE_P (valtype) |
| && !VECTOR_FLOAT_TYPE_P (valtype)) |
| break; |
| if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
| && !COMPLEX_MODE_P (mode)) |
| regno = FIRST_SSE_REG; |
| break; |
| case 8: |
| case 4: |
| if (mode == SFmode || mode == DFmode) |
| regno = FIRST_SSE_REG; |
| break; |
| default: |
| break; |
| } |
| } |
| return gen_rtx_REG (orig_mode, regno); |
| } |
| |
| static rtx |
| ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, |
| enum machine_mode orig_mode, enum machine_mode mode) |
| { |
| const_tree fn, fntype; |
| |
| fn = NULL_TREE; |
| if (fntype_or_decl && DECL_P (fntype_or_decl)) |
| fn = fntype_or_decl; |
| fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; |
| |
| if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) |
| return function_value_ms_64 (orig_mode, mode, valtype); |
| else if (TARGET_64BIT) |
| return function_value_64 (orig_mode, mode, valtype); |
| else |
| return function_value_32 (orig_mode, mode, fntype, fn); |
| } |
| |
| static rtx |
| ix86_function_value (const_tree valtype, const_tree fntype_or_decl, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| enum machine_mode mode, orig_mode; |
| |
| orig_mode = TYPE_MODE (valtype); |
| mode = type_natural_mode (valtype, NULL, true); |
| return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); |
| } |
| |
| /* Pointer function arguments and return values are promoted to |
| word_mode. */ |
| |
| static enum machine_mode |
| ix86_promote_function_mode (const_tree type, enum machine_mode mode, |
| int *punsignedp, const_tree fntype, |
| int for_return) |
| { |
| if (type != NULL_TREE && POINTER_TYPE_P (type)) |
| { |
| *punsignedp = POINTERS_EXTEND_UNSIGNED; |
| return word_mode; |
| } |
| return default_promote_function_mode (type, mode, punsignedp, fntype, |
| for_return); |
| } |
| |
| /* Return true if a structure, union or array with MODE containing FIELD |
| should be accessed using BLKmode. */ |
| |
| static bool |
| ix86_member_type_forces_blk (const_tree field, enum machine_mode mode) |
| { |
| /* Union with XFmode must be in BLKmode. */ |
| return (mode == XFmode |
| && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE |
| || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); |
| } |
| |
| rtx |
| ix86_libcall_value (enum machine_mode mode) |
| { |
| return ix86_function_value_1 (NULL, NULL, mode, mode); |
| } |
| |
| /* Return true iff type is returned in memory. */ |
| |
| static bool |
| ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
| { |
| #ifdef SUBTARGET_RETURN_IN_MEMORY |
| return SUBTARGET_RETURN_IN_MEMORY (type, fntype); |
| #else |
| const enum machine_mode mode = type_natural_mode (type, NULL, true); |
| HOST_WIDE_INT size; |
| |
| if (TARGET_64BIT) |
| { |
| if (ix86_function_type_abi (fntype) == MS_ABI) |
| { |
| size = int_size_in_bytes (type); |
| |
| /* __m128 is returned in xmm0. */ |
| if ((!type || VECTOR_INTEGER_TYPE_P (type) |
| || INTEGRAL_TYPE_P (type) |
| || VECTOR_FLOAT_TYPE_P (type)) |
| && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) |
| && !COMPLEX_MODE_P (mode) |
| && (GET_MODE_SIZE (mode) == 16 || size == 16)) |
| return false; |
| |
| /* Otherwise, the size must be exactly in [1248]. */ |
| return size != 1 && size != 2 && size != 4 && size != 8; |
| } |
| else |
| { |
| int needed_intregs, needed_sseregs; |
| |
| return examine_argument (mode, type, 1, |
| &needed_intregs, &needed_sseregs); |
| } |
| } |
| else |
| { |
| if (mode == BLKmode) |
| return true; |
| |
| size = int_size_in_bytes (type); |
| |
| if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) |
| return false; |
| |
| if (VECTOR_MODE_P (mode) || mode == TImode) |
| { |
| /* User-created vectors small enough to fit in EAX. */ |
| if (size < 8) |
| return false; |
| |
| /* Unless ABI prescibes otherwise, |
| MMX/3dNow values are returned in MM0 if available. */ |
| |
| if (size == 8) |
| return TARGET_VECT8_RETURNS || !TARGET_MMX; |
| |
| /* SSE values are returned in XMM0 if available. */ |
| if (size == 16) |
| return !TARGET_SSE; |
| |
| /* AVX values are returned in YMM0 if available. */ |
| if (size == 32) |
| return !TARGET_AVX; |
| |
| /* AVX512F values are returned in ZMM0 if available. */ |
| if (size == 64) |
| return !TARGET_AVX512F; |
| } |
| |
| if (mode == XFmode) |
| return false; |
| |
| if (size > 12) |
| return true; |
| |
| /* OImode shouldn't be used directly. */ |
| gcc_assert (mode != OImode); |
| |
| return false; |
| } |
| #endif |
| } |
| |
| |
| /* Create the va_list data type. */ |
| |
| /* Returns the calling convention specific va_list date type. |
| The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */ |
| |
| static tree |
| ix86_build_builtin_va_list_abi (enum calling_abi abi) |
| { |
| tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; |
| |
| /* For i386 we use plain pointer to argument area. */ |
| if (!TARGET_64BIT || abi == MS_ABI) |
| return build_pointer_type (char_type_node); |
| |
| record = lang_hooks.types.make_type (RECORD_TYPE); |
| type_decl = build_decl (BUILTINS_LOCATION, |
| TYPE_DECL, get_identifier ("__va_list_tag"), record); |
| |
| f_gpr = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("gp_offset"), |
| unsigned_type_node); |
| f_fpr = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("fp_offset"), |
| unsigned_type_node); |
| f_ovf = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("overflow_arg_area"), |
| ptr_type_node); |
| f_sav = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("reg_save_area"), |
| ptr_type_node); |
| |
| va_list_gpr_counter_field = f_gpr; |
| va_list_fpr_counter_field = f_fpr; |
| |
| DECL_FIELD_CONTEXT (f_gpr) = record; |
| DECL_FIELD_CONTEXT (f_fpr) = record; |
| DECL_FIELD_CONTEXT (f_ovf) = record; |
| DECL_FIELD_CONTEXT (f_sav) = record; |
| |
| TYPE_STUB_DECL (record) = type_decl; |
| TYPE_NAME (record) = type_decl; |
| TYPE_FIELDS (record) = f_gpr; |
| DECL_CHAIN (f_gpr) = f_fpr; |
| DECL_CHAIN (f_fpr) = f_ovf; |
| DECL_CHAIN (f_ovf) = f_sav; |
| |
| layout_type (record); |
| |
| /* The correct type is an array type of one element. */ |
| return build_array_type (record, build_index_type (size_zero_node)); |
| } |
| |
| /* Setup the builtin va_list data type and for 64-bit the additional |
| calling convention specific va_list data types. */ |
| |
| static tree |
| ix86_build_builtin_va_list (void) |
| { |
| tree ret = ix86_build_builtin_va_list_abi (ix86_abi); |
| |
| /* Initialize abi specific va_list builtin types. */ |
| if (TARGET_64BIT) |
| { |
| tree t; |
| if (ix86_abi == MS_ABI) |
| { |
| t = ix86_build_builtin_va_list_abi (SYSV_ABI); |
| if (TREE_CODE (t) != RECORD_TYPE) |
| t = build_variant_type_copy (t); |
| sysv_va_list_type_node = t; |
| } |
| else |
| { |
| t = ret; |
| if (TREE_CODE (t) != RECORD_TYPE) |
| t = build_variant_type_copy (t); |
| sysv_va_list_type_node = t; |
| } |
| if (ix86_abi != MS_ABI) |
| { |
| t = ix86_build_builtin_va_list_abi (MS_ABI); |
| if (TREE_CODE (t) != RECORD_TYPE) |
| t = build_variant_type_copy (t); |
| ms_va_list_type_node = t; |
| } |
| else |
| { |
| t = ret; |
| if (TREE_CODE (t) != RECORD_TYPE) |
| t = build_variant_type_copy (t); |
| ms_va_list_type_node = t; |
| } |
| } |
| |
| return ret; |
| } |
| |
| /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ |
| |
| static void |
| setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
| { |
| rtx save_area, mem; |
| alias_set_type set; |
| int i, max; |
| |
| /* GPR size of varargs save area. */ |
| if (cfun->va_list_gpr_size) |
| ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; |
| else |
| ix86_varargs_gpr_size = 0; |
| |
| /* FPR size of varargs save area. We don't need it if we don't pass |
| anything in SSE registers. */ |
| if (TARGET_SSE && cfun->va_list_fpr_size) |
| ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; |
| else |
| ix86_varargs_fpr_size = 0; |
| |
| if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) |
| return; |
| |
| save_area = frame_pointer_rtx; |
| set = get_varargs_alias_set (); |
| |
| max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
| if (max > X86_64_REGPARM_MAX) |
| max = X86_64_REGPARM_MAX; |
| |
| for (i = cum->regno; i < max; i++) |
| { |
| mem = gen_rtx_MEM (word_mode, |
| plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); |
| MEM_NOTRAP_P (mem) = 1; |
| set_mem_alias_set (mem, set); |
| emit_move_insn (mem, |
| gen_rtx_REG (word_mode, |
| x86_64_int_parameter_registers[i])); |
| } |
| |
| if (ix86_varargs_fpr_size) |
| { |
| enum machine_mode smode; |
| rtx label, test; |
| |
| /* Now emit code to save SSE registers. The AX parameter contains number |
| of SSE parameter registers used to call this function, though all we |
| actually check here is the zero/non-zero status. */ |
| |
| label = gen_label_rtx (); |
| test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); |
| emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), |
| label)); |
| |
| /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if |
| we used movdqa (i.e. TImode) instead? Perhaps even better would |
| be if we could determine the real mode of the data, via a hook |
| into pass_stdarg. Ignore all that for now. */ |
| smode = V4SFmode; |
| if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) |
| crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); |
| |
| max = cum->sse_regno + cfun->va_list_fpr_size / 16; |
| if (max > X86_64_SSE_REGPARM_MAX) |
| max = X86_64_SSE_REGPARM_MAX; |
| |
| for (i = cum->sse_regno; i < max; ++i) |
| { |
| mem = plus_constant (Pmode, save_area, |
| i * 16 + ix86_varargs_gpr_size); |
| mem = gen_rtx_MEM (smode, mem); |
| MEM_NOTRAP_P (mem) = 1; |
| set_mem_alias_set (mem, set); |
| set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); |
| |
| emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i))); |
| } |
| |
| emit_label (label); |
| } |
| } |
| |
| static void |
| setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
| { |
| alias_set_type set = get_varargs_alias_set (); |
| int i; |
| |
| /* Reset to zero, as there might be a sysv vaarg used |
| before. */ |
| ix86_varargs_gpr_size = 0; |
| ix86_varargs_fpr_size = 0; |
| |
| for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
| { |
| rtx reg, mem; |
| |
| mem = gen_rtx_MEM (Pmode, |
| plus_constant (Pmode, virtual_incoming_args_rtx, |
| i * UNITS_PER_WORD)); |
| MEM_NOTRAP_P (mem) = 1; |
| set_mem_alias_set (mem, set); |
| |
| reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); |
| emit_move_insn (mem, reg); |
| } |
| } |
| |
| static void |
| ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode, |
| tree type, int *pretend_size ATTRIBUTE_UNUSED, |
| int no_rtl) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| CUMULATIVE_ARGS next_cum; |
| tree fntype; |
| |
| /* This argument doesn't appear to be used anymore. Which is good, |
| because the old code here didn't suppress rtl generation. */ |
| gcc_assert (!no_rtl); |
| |
| if (!TARGET_64BIT) |
| return; |
| |
| fntype = TREE_TYPE (current_function_decl); |
| |
| /* For varargs, we do not want to skip the dummy va_dcl argument. |
| For stdargs, we do want to skip the last named argument. */ |
| next_cum = *cum; |
| if (stdarg_p (fntype)) |
| ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, |
| true); |
| |
| if (cum->call_abi == MS_ABI) |
| setup_incoming_varargs_ms_64 (&next_cum); |
| else |
| setup_incoming_varargs_64 (&next_cum); |
| } |
| |
| /* Checks if TYPE is of kind va_list char *. */ |
| |
| static bool |
| is_va_list_char_pointer (tree type) |
| { |
| tree canonic; |
| |
| /* For 32-bit it is always true. */ |
| if (!TARGET_64BIT) |
| return true; |
| canonic = ix86_canonical_va_list_type (type); |
| return (canonic == ms_va_list_type_node |
| || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
| } |
| |
| /* Implement va_start. */ |
| |
| static void |
| ix86_va_start (tree valist, rtx nextarg) |
| { |
| HOST_WIDE_INT words, n_gpr, n_fpr; |
| tree f_gpr, f_fpr, f_ovf, f_sav; |
| tree gpr, fpr, ovf, sav, t; |
| tree type; |
| rtx ovf_rtx; |
| |
| if (flag_split_stack |
| && cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| { |
| unsigned int scratch_regno; |
| |
| /* When we are splitting the stack, we can't refer to the stack |
| arguments using internal_arg_pointer, because they may be on |
| the old stack. The split stack prologue will arrange to |
| leave a pointer to the old stack arguments in a scratch |
| register, which we here copy to a pseudo-register. The split |
| stack prologue can't set the pseudo-register directly because |
| it (the prologue) runs before any registers have been saved. */ |
| |
| scratch_regno = split_stack_prologue_scratch_regno (); |
| if (scratch_regno != INVALID_REGNUM) |
| { |
| rtx reg, seq; |
| |
| reg = gen_reg_rtx (Pmode); |
| cfun->machine->split_stack_varargs_pointer = reg; |
| |
| start_sequence (); |
| emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); |
| seq = get_insns (); |
| end_sequence (); |
| |
| push_topmost_sequence (); |
| emit_insn_after (seq, entry_of_function ()); |
| pop_topmost_sequence (); |
| } |
| } |
| |
| /* Only 64bit target needs something special. */ |
| if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) |
| { |
| if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| std_expand_builtin_va_start (valist, nextarg); |
| else |
| { |
| rtx va_r, next; |
| |
| va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); |
| next = expand_binop (ptr_mode, add_optab, |
| cfun->machine->split_stack_varargs_pointer, |
| crtl->args.arg_offset_rtx, |
| NULL_RTX, 0, OPTAB_LIB_WIDEN); |
| convert_move (va_r, next, 0); |
| } |
| return; |
| } |
| |
| f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
| f_fpr = DECL_CHAIN (f_gpr); |
| f_ovf = DECL_CHAIN (f_fpr); |
| f_sav = DECL_CHAIN (f_ovf); |
| |
| valist = build_simple_mem_ref (valist); |
| TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); |
| /* The following should be folded into the MEM_REF offset. */ |
| gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), |
| f_gpr, NULL_TREE); |
| fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), |
| f_fpr, NULL_TREE); |
| ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), |
| f_ovf, NULL_TREE); |
| sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), |
| f_sav, NULL_TREE); |
| |
| /* Count number of gp and fp argument registers used. */ |
| words = crtl->args.info.words; |
| n_gpr = crtl->args.info.regno; |
| n_fpr = crtl->args.info.sse_regno; |
| |
| if (cfun->va_list_gpr_size) |
| { |
| type = TREE_TYPE (gpr); |
| t = build2 (MODIFY_EXPR, type, |
| gpr, build_int_cst (type, n_gpr * 8)); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| if (TARGET_SSE && cfun->va_list_fpr_size) |
| { |
| type = TREE_TYPE (fpr); |
| t = build2 (MODIFY_EXPR, type, fpr, |
| build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| /* Find the overflow area. */ |
| type = TREE_TYPE (ovf); |
| if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) |
| ovf_rtx = crtl->args.internal_arg_pointer; |
| else |
| ovf_rtx = cfun->machine->split_stack_varargs_pointer; |
| t = make_tree (type, ovf_rtx); |
| if (words != 0) |
| t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); |
| t = build2 (MODIFY_EXPR, type, ovf, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) |
| { |
| /* Find the register save area. |
| Prologue of the function save it right above stack frame. */ |
| type = TREE_TYPE (sav); |
| t = make_tree (type, frame_pointer_rtx); |
| if (!ix86_varargs_gpr_size) |
| t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); |
| t = build2 (MODIFY_EXPR, type, sav, t); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| } |
| |
| /* Implement va_arg. */ |
| |
| static tree |
| ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
| gimple_seq *post_p) |
| { |
| static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; |
| tree f_gpr, f_fpr, f_ovf, f_sav; |
| tree gpr, fpr, ovf, sav, t; |
| int size, rsize; |
| tree lab_false, lab_over = NULL_TREE; |
| tree addr, t2; |
| rtx container; |
| int indirect_p = 0; |
| tree ptrtype; |
| enum machine_mode nat_mode; |
| unsigned int arg_boundary; |
| |
| /* Only 64bit target needs something special. */ |
| if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) |
| return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
| |
| f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); |
| f_fpr = DECL_CHAIN (f_gpr); |
| f_ovf = DECL_CHAIN (f_fpr); |
| f_sav = DECL_CHAIN (f_ovf); |
| |
| gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), |
| build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE); |
| valist = build_va_arg_indirect_ref (valist); |
| fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); |
| ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); |
| sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); |
| |
| indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); |
| if (indirect_p) |
| type = build_pointer_type (type); |
| size = int_size_in_bytes (type); |
| rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| nat_mode = type_natural_mode (type, NULL, false); |
| switch (nat_mode) |
| { |
| case V8SFmode: |
| case V8SImode: |
| case V32QImode: |
| case V16HImode: |
| case V4DFmode: |
| case V4DImode: |
| case V16SFmode: |
| case V16SImode: |
| case V64QImode: |
| case V32HImode: |
| case V8DFmode: |
| case V8DImode: |
| /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ |
| if (!TARGET_64BIT_MS_ABI) |
| { |
| container = NULL; |
| break; |
| } |
| |
| default: |
| container = construct_container (nat_mode, TYPE_MODE (type), |
| type, 0, X86_64_REGPARM_MAX, |
| X86_64_SSE_REGPARM_MAX, intreg, |
| 0); |
| break; |
| } |
| |
| /* Pull the value out of the saved registers. */ |
| |
| addr = create_tmp_var (ptr_type_node, "addr"); |
| |
| if (container) |
| { |
| int needed_intregs, needed_sseregs; |
| bool need_temp; |
| tree int_addr, sse_addr; |
| |
| lab_false = create_artificial_label (UNKNOWN_LOCATION); |
| lab_over = create_artificial_label (UNKNOWN_LOCATION); |
| |
| examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); |
| |
| need_temp = (!REG_P (container) |
| && ((needed_intregs && TYPE_ALIGN (type) > 64) |
| || TYPE_ALIGN (type) > 128)); |
| |
| /* In case we are passing structure, verify that it is consecutive block |
| on the register save area. If not we need to do moves. */ |
| if (!need_temp && !REG_P (container)) |
| { |
| /* Verify that all registers are strictly consecutive */ |
| if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) |
| { |
| int i; |
| |
| for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i |
| || INTVAL (XEXP (slot, 1)) != i * 16) |
| need_temp = 1; |
| } |
| } |
| else |
| { |
| int i; |
| |
| for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| if (REGNO (XEXP (slot, 0)) != (unsigned int) i |
| || INTVAL (XEXP (slot, 1)) != i * 8) |
| need_temp = 1; |
| } |
| } |
| } |
| if (!need_temp) |
| { |
| int_addr = addr; |
| sse_addr = addr; |
| } |
| else |
| { |
| int_addr = create_tmp_var (ptr_type_node, "int_addr"); |
| sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); |
| } |
| |
| /* First ensure that we fit completely in registers. */ |
| if (needed_intregs) |
| { |
| t = build_int_cst (TREE_TYPE (gpr), |
| (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); |
| t = build2 (GE_EXPR, boolean_type_node, gpr, t); |
| t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| gimplify_and_add (t, pre_p); |
| } |
| if (needed_sseregs) |
| { |
| t = build_int_cst (TREE_TYPE (fpr), |
| (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 |
| + X86_64_REGPARM_MAX * 8); |
| t = build2 (GE_EXPR, boolean_type_node, fpr, t); |
| t2 = build1 (GOTO_EXPR, void_type_node, lab_false); |
| t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); |
| gimplify_and_add (t, pre_p); |
| } |
| |
| /* Compute index to start of area used for integer regs. */ |
| if (needed_intregs) |
| { |
| /* int_addr = gpr + sav; */ |
| t = fold_build_pointer_plus (sav, gpr); |
| gimplify_assign (int_addr, t, pre_p); |
| } |
| if (needed_sseregs) |
| { |
| /* sse_addr = fpr + sav; */ |
| t = fold_build_pointer_plus (sav, fpr); |
| gimplify_assign (sse_addr, t, pre_p); |
| } |
| if (need_temp) |
| { |
| int i, prev_size = 0; |
| tree temp = create_tmp_var (type, "va_arg_tmp"); |
| |
| /* addr = &temp; */ |
| t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
| gimplify_assign (addr, t, pre_p); |
| |
| for (i = 0; i < XVECLEN (container, 0); i++) |
| { |
| rtx slot = XVECEXP (container, 0, i); |
| rtx reg = XEXP (slot, 0); |
| enum machine_mode mode = GET_MODE (reg); |
| tree piece_type; |
| tree addr_type; |
| tree daddr_type; |
| tree src_addr, src; |
| int src_offset; |
| tree dest_addr, dest; |
| int cur_size = GET_MODE_SIZE (mode); |
| |
| gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); |
| prev_size = INTVAL (XEXP (slot, 1)); |
| if (prev_size + cur_size > size) |
| { |
| cur_size = size - prev_size; |
| mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1); |
| if (mode == BLKmode) |
| mode = QImode; |
| } |
| piece_type = lang_hooks.types.type_for_mode (mode, 1); |
| if (mode == GET_MODE (reg)) |
| addr_type = build_pointer_type (piece_type); |
| else |
| addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
| true); |
| daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
| true); |
| |
| if (SSE_REGNO_P (REGNO (reg))) |
| { |
| src_addr = sse_addr; |
| src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; |
| } |
| else |
| { |
| src_addr = int_addr; |
| src_offset = REGNO (reg) * 8; |
| } |
| src_addr = fold_convert (addr_type, src_addr); |
| src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); |
| |
| dest_addr = fold_convert (daddr_type, addr); |
| dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); |
| if (cur_size == GET_MODE_SIZE (mode)) |
| { |
| src = build_va_arg_indirect_ref (src_addr); |
| dest = build_va_arg_indirect_ref (dest_addr); |
| |
| gimplify_assign (dest, src, pre_p); |
| } |
| else |
| { |
| tree copy |
| = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), |
| 3, dest_addr, src_addr, |
| size_int (cur_size)); |
| gimplify_and_add (copy, pre_p); |
| } |
| prev_size += cur_size; |
| } |
| } |
| |
| if (needed_intregs) |
| { |
| t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, |
| build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); |
| gimplify_assign (gpr, t, pre_p); |
| } |
| |
| if (needed_sseregs) |
| { |
| t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, |
| build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); |
| gimplify_assign (fpr, t, pre_p); |
| } |
| |
| gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); |
| |
| gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); |
| } |
| |
| /* ... otherwise out of the overflow area. */ |
| |
| /* When we align parameter on stack for caller, if the parameter |
| alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be |
| aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee |
| here with caller. */ |
| arg_boundary = ix86_function_arg_boundary (VOIDmode, type); |
| if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) |
| arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; |
| |
| /* Care for on-stack alignment if needed. */ |
| if (arg_boundary <= 64 || size == 0) |
| t = ovf; |
| else |
| { |
| HOST_WIDE_INT align = arg_boundary / 8; |
| t = fold_build_pointer_plus_hwi (ovf, align - 1); |
| t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
| build_int_cst (TREE_TYPE (t), -align)); |
| } |
| |
| gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
| gimplify_assign (addr, t, pre_p); |
| |
| t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); |
| gimplify_assign (unshare_expr (ovf), t, pre_p); |
| |
| if (container) |
| gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); |
| |
| ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
| addr = fold_convert (ptrtype, addr); |
| |
| if (indirect_p) |
| addr = build_va_arg_indirect_ref (addr); |
| return build_va_arg_indirect_ref (addr); |
| } |
| |
| /* Return true if OPNUM's MEM should be matched |
| in movabs* patterns. */ |
| |
| bool |
| ix86_check_movabs (rtx insn, int opnum) |
| { |
| rtx set, mem; |
| |
| set = PATTERN (insn); |
| if (GET_CODE (set) == PARALLEL) |
| set = XVECEXP (set, 0, 0); |
| gcc_assert (GET_CODE (set) == SET); |
| mem = XEXP (set, opnum); |
| while (GET_CODE (mem) == SUBREG) |
| mem = SUBREG_REG (mem); |
| gcc_assert (MEM_P (mem)); |
| return volatile_ok || !MEM_VOLATILE_P (mem); |
| } |
| |
| /* Initialize the table of extra 80387 mathematical constants. */ |
| |
| static void |
| init_ext_80387_constants (void) |
| { |
| static const char * cst[5] = |
| { |
| "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ |
| "0.6931471805599453094286904741849753009", /* 1: fldln2 */ |
| "1.4426950408889634073876517827983434472", /* 2: fldl2e */ |
| "3.3219280948873623478083405569094566090", /* 3: fldl2t */ |
| "3.1415926535897932385128089594061862044", /* 4: fldpi */ |
| }; |
| int i; |
| |
| for (i = 0; i < 5; i++) |
| { |
| real_from_string (&ext_80387_constants_table[i], cst[i]); |
| /* Ensure each constant is rounded to XFmode precision. */ |
| real_convert (&ext_80387_constants_table[i], |
| XFmode, &ext_80387_constants_table[i]); |
| } |
| |
| ext_80387_constants_init = 1; |
| } |
| |
| /* Return non-zero if the constant is something that |
| can be loaded with a special instruction. */ |
| |
| int |
| standard_80387_constant_p (rtx x) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| REAL_VALUE_TYPE r; |
| |
| if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE))) |
| return -1; |
| |
| if (x == CONST0_RTX (mode)) |
| return 1; |
| if (x == CONST1_RTX (mode)) |
| return 2; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
| |
| /* For XFmode constants, try to find a special 80387 instruction when |
| optimizing for size or on those CPUs that benefit from them. */ |
| if (mode == XFmode |
| && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) |
| { |
| int i; |
| |
| if (! ext_80387_constants_init) |
| init_ext_80387_constants (); |
| |
| for (i = 0; i < 5; i++) |
| if (real_identical (&r, &ext_80387_constants_table[i])) |
| return i + 3; |
| } |
| |
| /* Load of the constant -0.0 or -1.0 will be split as |
| fldz;fchs or fld1;fchs sequence. */ |
| if (real_isnegzero (&r)) |
| return 8; |
| if (real_identical (&r, &dconstm1)) |
| return 9; |
| |
| return 0; |
| } |
| |
| /* Return the opcode of the special instruction to be used to load |
| the constant X. */ |
| |
| const char * |
| standard_80387_constant_opcode (rtx x) |
| { |
| switch (standard_80387_constant_p (x)) |
| { |
| case 1: |
| return "fldz"; |
| case 2: |
| return "fld1"; |
| case 3: |
| return "fldlg2"; |
| case 4: |
| return "fldln2"; |
| case 5: |
| return "fldl2e"; |
| case 6: |
| return "fldl2t"; |
| case 7: |
| return "fldpi"; |
| case 8: |
| case 9: |
| return "#"; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return the CONST_DOUBLE representing the 80387 constant that is |
| loaded by the specified special instruction. The argument IDX |
| matches the return value from standard_80387_constant_p. */ |
| |
| rtx |
| standard_80387_constant_rtx (int idx) |
| { |
| int i; |
| |
| if (! ext_80387_constants_init) |
| init_ext_80387_constants (); |
| |
| switch (idx) |
| { |
| case 3: |
| case 4: |
| case 5: |
| case 6: |
| case 7: |
| i = idx - 3; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], |
| XFmode); |
| } |
| |
| /* Return 1 if X is all 0s and 2 if x is all 1s |
| in supported SSE/AVX vector mode. */ |
| |
| int |
| standard_sse_constant_p (rtx x) |
| { |
| enum machine_mode mode = GET_MODE (x); |
| |
| if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
| return 1; |
| if (vector_all_ones_operand (x, mode)) |
| switch (mode) |
| { |
| case V16QImode: |
| case V8HImode: |
| case V4SImode: |
| case V2DImode: |
| if (TARGET_SSE2) |
| return 2; |
| case V32QImode: |
| case V16HImode: |
| case V8SImode: |
| case V4DImode: |
| if (TARGET_AVX2) |
| return 2; |
| case V64QImode: |
| case V32HImode: |
| case V16SImode: |
| case V8DImode: |
| if (TARGET_AVX512F) |
| return 2; |
| default: |
| break; |
| } |
| |
| return 0; |
| } |
| |
| /* Return the opcode of the special instruction to be used to load |
| the constant X. */ |
| |
| const char * |
| standard_sse_constant_opcode (rtx insn, rtx x) |
| { |
| switch (standard_sse_constant_p (x)) |
| { |
| case 1: |
| switch (get_attr_mode (insn)) |
| { |
| case MODE_XI: |
| case MODE_V16SF: |
| return "vpxord\t%g0, %g0, %g0"; |
| case MODE_V8DF: |
| return "vpxorq\t%g0, %g0, %g0"; |
| case MODE_TI: |
| return "%vpxor\t%0, %d0"; |
| case MODE_V2DF: |
| return "%vxorpd\t%0, %d0"; |
| case MODE_V4SF: |
| return "%vxorps\t%0, %d0"; |
| |
| case MODE_OI: |
| return "vpxor\t%x0, %x0, %x0"; |
| case MODE_V4DF: |
| return "vxorpd\t%x0, %x0, %x0"; |
| case MODE_V8SF: |
| return "vxorps\t%x0, %x0, %x0"; |
| |
| default: |
| break; |
| } |
| |
| case 2: |
| if (get_attr_mode (insn) == MODE_XI |
| || get_attr_mode (insn) == MODE_V8DF |
| || get_attr_mode (insn) == MODE_V16SF) |
| return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; |
| if (TARGET_AVX) |
| return "vpcmpeqd\t%0, %0, %0"; |
| else |
| return "pcmpeqd\t%0, %0"; |
| |
| default: |
| break; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Returns true if OP contains a symbol reference */ |
| |
| bool |
| symbolic_reference_mentioned_p (rtx op) |
| { |
| const char *fmt; |
| int i; |
| |
| if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) |
| return true; |
| |
| fmt = GET_RTX_FORMAT (GET_CODE (op)); |
| for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| int j; |
| |
| for (j = XVECLEN (op, i) - 1; j >= 0; j--) |
| if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) |
| return true; |
| } |
| |
| else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if it is appropriate to emit `ret' instructions in the |
| body of a function. Do this only if the epilogue is simple, needing a |
| couple of insns. Prior to reloading, we can't tell how many registers |
| must be saved, so return false then. Return false if there is no frame |
| marker to de-allocate. */ |
| |
| bool |
| ix86_can_use_return_insn_p (void) |
| { |
| struct ix86_frame frame; |
| |
| if (! reload_completed || frame_pointer_needed) |
| return 0; |
| |
| /* Don't allow more than 32k pop, since that's all we can do |
| with one instruction. */ |
| if (crtl->args.pops_args && crtl->args.size >= 32768) |
| return 0; |
| |
| ix86_compute_frame_layout (&frame); |
| return (frame.stack_pointer_offset == UNITS_PER_WORD |
| && (frame.nregs + frame.nsseregs) == 0); |
| } |
| |
| /* Value should be nonzero if functions must have frame pointers. |
| Zero means the frame pointer need not be set up (and parms may |
| be accessed via the stack pointer) in functions that seem suitable. */ |
| |
| static bool |
| ix86_frame_pointer_required (void) |
| { |
| /* If we accessed previous frames, then the generated code expects |
| to be able to access the saved ebp value in our frame. */ |
| if (cfun->machine->accesses_prev_frame) |
| return true; |
| |
| /* Several x86 os'es need a frame pointer for other reasons, |
| usually pertaining to setjmp. */ |
| if (SUBTARGET_FRAME_POINTER_REQUIRED) |
| return true; |
| |
| /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ |
| if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) |
| return true; |
| |
| /* Win64 SEH, very large frames need a frame-pointer as maximum stack |
| allocation is 4GB. */ |
| if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) |
| return true; |
| |
| /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER |
| turns off the frame pointer by default. Turn it back on now if |
| we've not got a leaf function. */ |
| if (TARGET_OMIT_LEAF_FRAME_POINTER |
| && (!crtl->is_leaf |
| || ix86_current_function_calls_tls_descriptor)) |
| return true; |
| |
| if (crtl->profile && !flag_fentry) |
| return true; |
| |
| return false; |
| } |
| |
| /* Record that the current function accesses previous call frames. */ |
| |
| void |
| ix86_setup_frame_addresses (void) |
| { |
| cfun->machine->accesses_prev_frame = 1; |
| } |
| |
| #ifndef USE_HIDDEN_LINKONCE |
| # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) |
| # define USE_HIDDEN_LINKONCE 1 |
| # else |
| # define USE_HIDDEN_LINKONCE 0 |
| # endif |
| #endif |
| |
| static int pic_labels_used; |
| |
| /* Fills in the label name that should be used for a pc thunk for |
| the given register. */ |
| |
| static void |
| get_pc_thunk_name (char name[32], unsigned int regno) |
| { |
| gcc_assert (!TARGET_64BIT); |
| |
| if (USE_HIDDEN_LINKONCE) |
| sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); |
| else |
| ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); |
| } |
| |
| |
| /* This function generates code for -fpic that loads %ebx with |
| the return address of the caller and then returns. */ |
| |
| static void |
| ix86_code_end (void) |
| { |
| rtx xops[2]; |
| int regno; |
| |
| for (regno = AX_REG; regno <= SP_REG; regno++) |
| { |
| char name[32]; |
| tree decl; |
| |
| if (!(pic_labels_used & (1 << regno))) |
| continue; |
| |
| get_pc_thunk_name (name, regno); |
| |
| decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
| get_identifier (name), |
| build_function_type_list (void_type_node, NULL_TREE)); |
| DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
| NULL_TREE, void_type_node); |
| TREE_PUBLIC (decl) = 1; |
| TREE_STATIC (decl) = 1; |
| DECL_IGNORED_P (decl) = 1; |
| |
| #if TARGET_MACHO |
| if (TARGET_MACHO) |
| { |
| switch_to_section (darwin_sections[text_coal_section]); |
| fputs ("\t.weak_definition\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| fputs ("\n\t.private_extern\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| putc ('\n', asm_out_file); |
| ASM_OUTPUT_LABEL (asm_out_file, name); |
| DECL_WEAK (decl) = 1; |
| } |
| else |
| #endif |
| if (USE_HIDDEN_LINKONCE) |
| { |
| cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); |
| |
| targetm.asm_out.unique_section (decl, 0); |
| switch_to_section (get_named_section (decl, NULL, 0)); |
| |
| targetm.asm_out.globalize_label (asm_out_file, name); |
| fputs ("\t.hidden\t", asm_out_file); |
| assemble_name (asm_out_file, name); |
| putc ('\n', asm_out_file); |
| ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
| } |
| else |
| { |
| switch_to_section (text_section); |
| ASM_OUTPUT_LABEL (asm_out_file, name); |
| } |
| |
| DECL_INITIAL (decl) = make_node (BLOCK); |
| current_function_decl = decl; |
| init_function_start (decl); |
| first_function_block_is_cold = false; |
| /* Make sure unwind info is emitted for the thunk if needed. */ |
| final_start_function (emit_barrier (), asm_out_file, 1); |
| |
| /* Pad stack IP move with 4 instructions (two NOPs count |
| as one instruction). */ |
| if (TARGET_PAD_SHORT_FUNCTION) |
| { |
| int i = 8; |
| |
| while (i--) |
| fputs ("\tnop\n", asm_out_file); |
| } |
| |
| xops[0] = gen_rtx_REG (Pmode, regno); |
| xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
| if (TARGET_SFI_CFLOW_NACL1) |
| /* The NaCl replacement for the return instruction needs a scratch |
| register. Fortunately, the value it puts in that register is |
| exactly the one we're trying to extract here. */ |
| output_asm_insn ("naclret\t%0", xops); |
| else |
| { |
| output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); |
| output_asm_insn ("ret", NULL); |
| } |
| final_end_function (); |
| init_insn_lengths (); |
| free_after_compilation (cfun); |
| set_cfun (NULL); |
| current_function_decl = NULL; |
| } |
| |
| if (flag_split_stack) |
| file_end_indicate_split_stack (); |
| } |
| |
| /* Emit code for the SET_GOT patterns. */ |
| |
| const char * |
| output_set_got (rtx dest, rtx label) |
| { |
| rtx xops[3]; |
| |
| xops[0] = dest; |
| |
| if (TARGET_VXWORKS_RTP && flag_pic) |
| { |
| /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ |
| xops[2] = gen_rtx_MEM (Pmode, |
| gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); |
| output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); |
| |
| /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. |
| Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as |
| an unadorned address. */ |
| xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
| SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; |
| output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); |
| return ""; |
| } |
| |
| xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); |
| |
| if (!flag_pic) |
| { |
| if (TARGET_MACHO) |
| /* We don't need a pic base, we're not producing pic. */ |
| gcc_unreachable (); |
| |
| xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
| output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); |
| |
| #if TARGET_MACHO |
| /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
| is what will be referenced by the Mach-O PIC subsystem. */ |
| if (!label) |
| ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
| #endif |
| |
| targetm.asm_out.internal_label (asm_out_file, "L", |
| CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
| } |
| else |
| { |
| char name[32]; |
| get_pc_thunk_name (name, REGNO (dest)); |
| pic_labels_used |= 1 << REGNO (dest); |
| |
| xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
| xops[2] = gen_rtx_MEM (QImode, xops[2]); |
| if (TARGET_SFI_CFLOW_NACL1) |
| output_asm_insn ("nacl_direct_call\t%X2", xops); |
| else |
| output_asm_insn ("call\t%X2", xops); |
| |
| #if TARGET_MACHO |
| /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. |
| This is what will be referenced by the Mach-O PIC subsystem. */ |
| if (machopic_should_output_picbase_label () || !label) |
| ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); |
| |
| /* When we are restoring the pic base at the site of a nonlocal label, |
| and we decided to emit the pic base above, we will still output a |
| local label used for calculating the correction offset (even though |
| the offset will be 0 in that case). */ |
| if (label) |
| targetm.asm_out.internal_label (asm_out_file, "L", |
| CODE_LABEL_NUMBER (label)); |
| #endif |
| } |
| |
| if (!TARGET_MACHO) |
| output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); |
| |
| return ""; |
| } |
| |
| /* Generate an "push" pattern for input ARG. */ |
| |
| static rtx |
| gen_push (rtx arg) |
| { |
| struct machine_function *m = cfun->machine; |
| |
| if (m->fs.cfa_reg == stack_pointer_rtx) |
| m->fs.cfa_offset += UNITS_PER_WORD; |
| m->fs.sp_offset += UNITS_PER_WORD; |
| |
| if (REG_P (arg) && GET_MODE (arg) != word_mode) |
| arg = gen_rtx_REG (word_mode, REGNO (arg)); |
| |
| return gen_rtx_SET (VOIDmode, |
| gen_rtx_MEM (word_mode, |
| gen_rtx_PRE_DEC (Pmode, |
| stack_pointer_rtx)), |
| arg); |
| } |
| |
| /* Generate an "pop" pattern for input ARG. */ |
| |
| static rtx |
| gen_pop (rtx arg) |
| { |
| if (REG_P (arg) && GET_MODE (arg) != word_mode) |
| arg = gen_rtx_REG (word_mode, REGNO (arg)); |
| |
| return gen_rtx_SET (VOIDmode, |
| arg, |
| gen_rtx_MEM (word_mode, |
| gen_rtx_POST_INC (Pmode, |
| stack_pointer_rtx))); |
| } |
| |
| /* Return >= 0 if there is an unused call-clobbered register available |
| for the entire function. */ |
| |
| static unsigned int |
| ix86_select_alt_pic_regnum (void) |
| { |
| if (crtl->is_leaf |
| && !crtl->profile |
| && !ix86_current_function_calls_tls_descriptor) |
| { |
| int i, drap; |
| /* Can't use the same register for both PIC and DRAP. */ |
| if (crtl->drap_reg) |
| drap = REGNO (crtl->drap_reg); |
| else |
| drap = -1; |
| for (i = 2; i >= 0; --i) |
| if (i != drap && !df_regs_ever_live_p (i)) |
| return i; |
| } |
| |
| return INVALID_REGNUM; |
| } |
| |
| /* Return TRUE if we need to save REGNO. */ |
| |
| static bool |
| ix86_save_reg (unsigned int regno, bool maybe_eh_return) |
| { |
| if (pic_offset_table_rtx |
| && regno == REAL_PIC_OFFSET_TABLE_REGNUM |
| && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
| || crtl->profile |
| || crtl->calls_eh_return |
| || crtl->uses_const_pool |
| || cfun->has_nonlocal_label)) |
| return ix86_select_alt_pic_regnum () == INVALID_REGNUM; |
| |
| if (crtl->calls_eh_return && maybe_eh_return) |
| { |
| unsigned i; |
| for (i = 0; ; i++) |
| { |
| unsigned test = EH_RETURN_DATA_REGNO (i); |
| if (test == INVALID_REGNUM) |
| break; |
| if (test == regno) |
| return true; |
| } |
| } |
| |
| if (crtl->drap_reg |
| && regno == REGNO (crtl->drap_reg) |
| && !cfun->machine->no_drap_save_restore) |
| return true; |
| |
| return (df_regs_ever_live_p (regno) |
| && !call_used_regs[regno] |
| && !fixed_regs[regno] |
| && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); |
| } |
| |
| /* Return number of saved general prupose registers. */ |
| |
| static int |
| ix86_nsaved_regs (void) |
| { |
| int nregs = 0; |
| int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) |
| nregs ++; |
| return nregs; |
| } |
| |
| /* Return number of saved SSE registrers. */ |
| |
| static int |
| ix86_nsaved_sseregs (void) |
| { |
| int nregs = 0; |
| int regno; |
| |
| if (!TARGET_64BIT_MS_ABI) |
| return 0; |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) |
| nregs ++; |
| return nregs; |
| } |
| |
| /* Given FROM and TO register numbers, say whether this elimination is |
| allowed. If stack alignment is needed, we can only replace argument |
| pointer with hard frame pointer, or replace frame pointer with stack |
| pointer. Otherwise, frame pointer elimination is automatically |
| handled and all other eliminations are valid. */ |
| |
| static bool |
| ix86_can_eliminate (const int from, const int to) |
| { |
| if (stack_realign_fp) |
| return ((from == ARG_POINTER_REGNUM |
| && to == HARD_FRAME_POINTER_REGNUM) |
| || (from == FRAME_POINTER_REGNUM |
| && to == STACK_POINTER_REGNUM)); |
| else |
| return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
| } |
| |
| /* Return the offset between two registers, one to be eliminated, and the other |
| its replacement, at the start of a routine. */ |
| |
| HOST_WIDE_INT |
| ix86_initial_elimination_offset (int from, int to) |
| { |
| struct ix86_frame frame; |
| ix86_compute_frame_layout (&frame); |
| |
| if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return frame.hard_frame_pointer_offset; |
| else if (from == FRAME_POINTER_REGNUM |
| && to == HARD_FRAME_POINTER_REGNUM) |
| return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; |
| else |
| { |
| gcc_assert (to == STACK_POINTER_REGNUM); |
| |
| if (from == ARG_POINTER_REGNUM) |
| return frame.stack_pointer_offset; |
| |
| gcc_assert (from == FRAME_POINTER_REGNUM); |
| return frame.stack_pointer_offset - frame.frame_pointer_offset; |
| } |
| } |
| |
| /* In a dynamically-aligned function, we can't know the offset from |
| stack pointer to frame pointer, so we must ensure that setjmp |
| eliminates fp against the hard fp (%ebp) rather than trying to |
| index from %esp up to the top of the frame across a gap that is |
| of unknown (at compile-time) size. */ |
| static rtx |
| ix86_builtin_setjmp_frame_value (void) |
| { |
| return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; |
| } |
| |
| /* When using -fsplit-stack, the allocation routines set a field in |
| the TCB to the bottom of the stack plus this much space, measured |
| in bytes. */ |
| |
| #define SPLIT_STACK_AVAILABLE 256 |
| |
| /* Fill structure ix86_frame about frame of currently computed function. */ |
| |
| static void |
| ix86_compute_frame_layout (struct ix86_frame *frame) |
| { |
| unsigned HOST_WIDE_INT stack_alignment_needed; |
| HOST_WIDE_INT offset; |
| unsigned HOST_WIDE_INT preferred_alignment; |
| HOST_WIDE_INT size = get_frame_size (); |
| HOST_WIDE_INT to_allocate; |
| |
| frame->nregs = ix86_nsaved_regs (); |
| frame->nsseregs = ix86_nsaved_sseregs (); |
| |
| /* 64-bit MS ABI seem to require stack alignment to be always 16 except for |
| function prologues and leaf. */ |
| if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) |
| && (!crtl->is_leaf || cfun->calls_alloca != 0 |
| || ix86_current_function_calls_tls_descriptor)) |
| { |
| crtl->preferred_stack_boundary = 128; |
| crtl->stack_alignment_needed = 128; |
| } |
| /* preferred_stack_boundary is never updated for call |
| expanded from tls descriptor. Update it here. We don't update it in |
| expand stage because according to the comments before |
| ix86_current_function_calls_tls_descriptor, tls calls may be optimized |
| away. */ |
| else if (ix86_current_function_calls_tls_descriptor |
| && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) |
| { |
| crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; |
| if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) |
| crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; |
| } |
| |
| stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
| |
| gcc_assert (!size || stack_alignment_needed); |
| gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); |
| gcc_assert (preferred_alignment <= stack_alignment_needed); |
| |
| /* For SEH we have to limit the amount of code movement into the prologue. |
| At present we do this via a BLOCKAGE, at which point there's very little |
| scheduling that can be done, which means that there's very little point |
| in doing anything except PUSHs. */ |
| if (TARGET_SEH) |
| cfun->machine->use_fast_prologue_epilogue = false; |
| |
| /* During reload iteration the amount of registers saved can change. |
| Recompute the value as needed. Do not recompute when amount of registers |
| didn't change as reload does multiple calls to the function and does not |
| expect the decision to change within single iteration. */ |
| else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) |
| { |
| int count = frame->nregs; |
| struct cgraph_node *node = cgraph_get_node (current_function_decl); |
| |
| cfun->machine->use_fast_prologue_epilogue_nregs = count; |
| |
| /* The fast prologue uses move instead of push to save registers. This |
| is significantly longer, but also executes faster as modern hardware |
| can execute the moves in parallel, but can't do that for push/pop. |
| |
| Be careful about choosing what prologue to emit: When function takes |
| many instructions to execute we may use slow version as well as in |
| case function is known to be outside hot spot (this is known with |
| feedback only). Weight the size of function by number of registers |
| to save as it is cheap to use one or two push instructions but very |
| slow to use many of them. */ |
| if (count) |
| count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
| if (node->frequency < NODE_FREQUENCY_NORMAL |
| || (flag_branch_probabilities |
| && node->frequency < NODE_FREQUENCY_HOT)) |
| cfun->machine->use_fast_prologue_epilogue = false; |
| else |
| cfun->machine->use_fast_prologue_epilogue |
| = !expensive_function_p (count); |
| } |
| |
| frame->save_regs_using_mov |
| = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue |
| /* If static stack checking is enabled and done with probes, |
| the registers need to be saved before allocating the frame. */ |
| && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); |
| |
| /* Skip return address. */ |
| offset = UNITS_PER_WORD; |
| |
| /* Skip pushed static chain. */ |
| if (ix86_static_chain_on_stack) |
| offset += UNITS_PER_WORD; |
| |
| /* Skip saved base pointer. */ |
| if (frame_pointer_needed) |
| offset += UNITS_PER_WORD; |
| frame->hfp_save_offset = offset; |
| |
| /* The traditional frame pointer location is at the top of the frame. */ |
| frame->hard_frame_pointer_offset = offset; |
| |
| /* Register save area */ |
| offset += frame->nregs * UNITS_PER_WORD; |
| frame->reg_save_offset = offset; |
| |
| /* On SEH target, registers are pushed just before the frame pointer |
| location. */ |
| if (TARGET_SEH) |
| frame->hard_frame_pointer_offset = offset; |
| |
| /* Align and set SSE register save area. */ |
| if (frame->nsseregs) |
| { |
| /* The only ABI that has saved SSE registers (Win64) also has a |
| 16-byte aligned default stack, and thus we don't need to be |
| within the re-aligned local stack frame to save them. */ |
| gcc_assert (INCOMING_STACK_BOUNDARY >= 128); |
| offset = (offset + 16 - 1) & -16; |
| offset += frame->nsseregs * 16; |
| } |
| frame->sse_reg_save_offset = offset; |
| |
| /* The re-aligned stack starts here. Values before this point are not |
| directly comparable with values below this point. In order to make |
| sure that no value happens to be the same before and after, force |
| the alignment computation below to add a non-zero value. */ |
| if (stack_realign_fp) |
| offset = (offset + stack_alignment_needed) & -stack_alignment_needed; |
| |
| /* Va-arg area */ |
| frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; |
| offset += frame->va_arg_size; |
| |
| /* Align start of frame for local function. */ |
| if (stack_realign_fp |
| || offset != frame->sse_reg_save_offset |
| || size != 0 |
| || !crtl->is_leaf |
| || cfun->calls_alloca |
| || ix86_current_function_calls_tls_descriptor) |
| offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed; |
| |
| /* Frame pointer points here. */ |
| frame->frame_pointer_offset = offset; |
| |
| offset += size; |
| |
| /* Add outgoing arguments area. Can be skipped if we eliminated |
| all the function calls as dead code. |
| Skipping is however impossible when function calls alloca. Alloca |
| expander assumes that last crtl->outgoing_args_size |
| of stack frame are unused. */ |
| if (ACCUMULATE_OUTGOING_ARGS |
| && (!crtl->is_leaf || cfun->calls_alloca |
| || ix86_current_function_calls_tls_descriptor)) |
| { |
| offset += crtl->outgoing_args_size; |
| frame->outgoing_arguments_size = crtl->outgoing_args_size; |
| } |
| else |
| frame->outgoing_arguments_size = 0; |
| |
| /* Align stack boundary. Only needed if we're calling another function |
| or using alloca. */ |
| if (!crtl->is_leaf || cfun->calls_alloca |
| || ix86_current_function_calls_tls_descriptor) |
| offset = (offset + preferred_alignment - 1) & -preferred_alignment; |
| |
| /* We've reached end of stack frame. */ |
| frame->stack_pointer_offset = offset; |
| |
| /* Size prologue needs to allocate. */ |
| to_allocate = offset - frame->sse_reg_save_offset; |
| |
| if ((!to_allocate && frame->nregs <= 1) |
| || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000)) |
| frame->save_regs_using_mov = false; |
| |
| if (ix86_using_red_zone () |
| && crtl->sp_is_unchanging |
| && crtl->is_leaf |
| && !ix86_current_function_calls_tls_descriptor) |
| { |
| frame->red_zone_size = to_allocate; |
| if (frame->save_regs_using_mov) |
| frame->red_zone_size += frame->nregs * UNITS_PER_WORD; |
| if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) |
| frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; |
| } |
| else |
| frame->red_zone_size = 0; |
| frame->stack_pointer_offset -= frame->red_zone_size; |
| |
| /* The SEH frame pointer location is near the bottom of the frame. |
| This is enforced by the fact that the difference between the |
| stack pointer and the frame pointer is limited to 240 bytes in |
| the unwind data structure. */ |
| if (TARGET_SEH) |
| { |
| HOST_WIDE_INT diff; |
| |
| /* If we can leave the frame pointer where it is, do so. Also, returns |
| the establisher frame for __builtin_frame_address (0). */ |
| diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; |
| if (diff <= SEH_MAX_FRAME_SIZE |
| && (diff > 240 || (diff & 15) != 0) |
| && !crtl->accesses_prior_frames) |
| { |
| /* Ideally we'd determine what portion of the local stack frame |
| (within the constraint of the lowest 240) is most heavily used. |
| But without that complication, simply bias the frame pointer |
| by 128 bytes so as to maximize the amount of the local stack |
| frame that is addressable with 8-bit offsets. */ |
| frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; |
| } |
| } |
| } |
| |
| /* This is semi-inlined memory_address_length, but simplified |
| since we know that we're always dealing with reg+offset, and |
| to avoid having to create and discard all that rtl. */ |
| |
| static inline int |
| choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) |
| { |
| int len = 4; |
| |
| if (offset == 0) |
| { |
| /* EBP and R13 cannot be encoded without an offset. */ |
| len = (regno == BP_REG || regno == R13_REG); |
| } |
| else if (IN_RANGE (offset, -128, 127)) |
| len = 1; |
| |
| /* ESP and R12 must be encoded with a SIB byte. */ |
| if (regno == SP_REG || regno == R12_REG) |
| len++; |
| |
| return len; |
| } |
| |
| /* Return an RTX that points to CFA_OFFSET within the stack frame. |
| The valid base registers are taken from CFUN->MACHINE->FS. */ |
| |
| static rtx |
| choose_baseaddr (HOST_WIDE_INT cfa_offset) |
| { |
| const struct machine_function *m = cfun->machine; |
| rtx base_reg = NULL; |
| HOST_WIDE_INT base_offset = 0; |
| |
| if (m->use_fast_prologue_epilogue) |
| { |
| /* Choose the base register most likely to allow the most scheduling |
| opportunities. Generally FP is valid throughout the function, |
| while DRAP must be reloaded within the epilogue. But choose either |
| over the SP due to increased encoding size. */ |
| |
| if (m->fs.fp_valid) |
| { |
| base_reg = hard_frame_pointer_rtx; |
| base_offset = m->fs.fp_offset - cfa_offset; |
| } |
| else if (m->fs.drap_valid) |
| { |
| base_reg = crtl->drap_reg; |
| base_offset = 0 - cfa_offset; |
| } |
| else if (m->fs.sp_valid) |
| { |
| base_reg = stack_pointer_rtx; |
| base_offset = m->fs.sp_offset - cfa_offset; |
| } |
| } |
| else |
| { |
| HOST_WIDE_INT toffset; |
| int len = 16, tlen; |
| |
| /* Choose the base register with the smallest address encoding. |
| With a tie, choose FP > DRAP > SP. */ |
| if (m->fs.sp_valid) |
| { |
| base_reg = stack_pointer_rtx; |
| base_offset = m->fs.sp_offset - cfa_offset; |
| len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); |
| } |
| if (m->fs.drap_valid) |
| { |
| toffset = 0 - cfa_offset; |
| tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); |
| if (tlen <= len) |
| { |
| base_reg = crtl->drap_reg; |
| base_offset = toffset; |
| len = tlen; |
| } |
| } |
| if (m->fs.fp_valid) |
| { |
| toffset = m->fs.fp_offset - cfa_offset; |
| tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); |
| if (tlen <= len) |
| { |
| base_reg = hard_frame_pointer_rtx; |
| base_offset = toffset; |
| len = tlen; |
| } |
| } |
| } |
| gcc_assert (base_reg != NULL); |
| |
| return plus_constant (Pmode, base_reg, base_offset); |
| } |
| |
| /* Emit code to save registers in the prologue. */ |
| |
| static void |
| ix86_emit_save_regs (void) |
| { |
| unsigned int regno; |
| rtx insn; |
| |
| for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) |
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) |
| { |
| insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| /* Emit a single register save at CFA - CFA_OFFSET. */ |
| |
| static void |
| ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno, |
| HOST_WIDE_INT cfa_offset) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx reg = gen_rtx_REG (mode, regno); |
| rtx mem, addr, base, insn; |
| |
| addr = choose_baseaddr (cfa_offset); |
| mem = gen_frame_mem (mode, addr); |
| |
| /* For SSE saves, we need to indicate the 128-bit alignment. */ |
| set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); |
| |
| insn = emit_move_insn (mem, reg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| base = addr; |
| if (GET_CODE (base) == PLUS) |
| base = XEXP (base, 0); |
| gcc_checking_assert (REG_P (base)); |
| |
| /* When saving registers into a re-aligned local stack frame, avoid |
| any tricky guessing by dwarf2out. */ |
| if (m->fs.realigned) |
| { |
| gcc_checking_assert (stack_realign_drap); |
| |
| if (regno == REGNO (crtl->drap_reg)) |
| { |
| /* A bit of a hack. We force the DRAP register to be saved in |
| the re-aligned stack frame, which provides us with a copy |
| of the CFA that will last past the prologue. Install it. */ |
| gcc_checking_assert (cfun->machine->fs.fp_valid); |
| addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
| cfun->machine->fs.fp_offset - cfa_offset); |
| mem = gen_rtx_MEM (mode, addr); |
| add_reg_note (insn, REG_CFA_DEF_CFA, mem); |
| } |
| else |
| { |
| /* The frame pointer is a stable reference within the |
| aligned frame. Use it. */ |
| gcc_checking_assert (cfun->machine->fs.fp_valid); |
| addr = plus_constant (Pmode, hard_frame_pointer_rtx, |
| cfun->machine->fs.fp_offset - cfa_offset); |
| mem = gen_rtx_MEM (mode, addr); |
| add_reg_note (insn, REG_CFA_EXPRESSION, |
| gen_rtx_SET (VOIDmode, mem, reg)); |
| } |
| } |
| |
| /* The memory may not be relative to the current CFA register, |
| which means that we may need to generate a new pattern for |
| use by the unwind info. */ |
| else if (base != m->fs.cfa_reg) |
| { |
| addr = plus_constant (Pmode, m->fs.cfa_reg, |
| m->fs.cfa_offset - cfa_offset); |
| mem = gen_rtx_MEM (mode, addr); |
| add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg)); |
| } |
| } |
| |
| /* Emit code to save registers using MOV insns. |
| First register is stored at CFA - CFA_OFFSET. */ |
| static void |
| ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) |
| { |
| unsigned int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) |
| { |
| ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); |
| cfa_offset -= UNITS_PER_WORD; |
| } |
| } |
| |
| /* Emit code to save SSE registers using MOV insns. |
| First register is stored at CFA - CFA_OFFSET. */ |
| static void |
| ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) |
| { |
| unsigned int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) |
| { |
| ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); |
| cfa_offset -= 16; |
| } |
| } |
| |
| static GTY(()) rtx queued_cfa_restores; |
| |
| /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
| manipulation insn. The value is on the stack at CFA - CFA_OFFSET. |
| Don't add the note if the previously saved value will be left untouched |
| within stack red-zone till return, as unwinders can find the same value |
| in the register and on the stack. */ |
| |
| static void |
| ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset) |
| { |
| if (!crtl->shrink_wrapped |
| && cfa_offset <= cfun->machine->fs.red_zone_offset) |
| return; |
| |
| if (insn) |
| { |
| add_reg_note (insn, REG_CFA_RESTORE, reg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| else |
| queued_cfa_restores |
| = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
| } |
| |
| /* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
| |
| static void |
| ix86_add_queued_cfa_restore_notes (rtx insn) |
| { |
| rtx last; |
| if (!queued_cfa_restores) |
| return; |
| for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
| ; |
| XEXP (last, 1) = REG_NOTES (insn); |
| REG_NOTES (insn) = queued_cfa_restores; |
| queued_cfa_restores = NULL_RTX; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* Expand prologue or epilogue stack adjustment. |
| The pattern exist to put a dependency on all ebp-based memory accesses. |
| STYLE should be negative if instructions should be marked as frame related, |
| zero if %r11 register is live and cannot be freely used and positive |
| otherwise. */ |
| |
| static void |
| pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
| int style, bool set_cfa) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx insn; |
| bool add_frame_related_expr = false; |
| |
| if (Pmode == SImode) |
| insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); |
| else if (x86_64_immediate_operand (offset, DImode)) |
| insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); |
| else |
| { |
| rtx tmp; |
| /* r11 is used by indirect sibcall return as well, set before the |
| epilogue and used after the epilogue. */ |
| if (style) |
| tmp = gen_rtx_REG (DImode, R11_REG); |
| else |
| { |
| gcc_assert (src != hard_frame_pointer_rtx |
| && dest != hard_frame_pointer_rtx); |
| tmp = hard_frame_pointer_rtx; |
| } |
| insn = emit_insn (gen_rtx_SET (DImode, tmp, offset)); |
| if (style < 0) |
| add_frame_related_expr = true; |
| |
| insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); |
| } |
| |
| insn = emit_insn (insn); |
| if (style >= 0) |
| ix86_add_queued_cfa_restore_notes (insn); |
| |
| if (set_cfa) |
| { |
| rtx r; |
| |
| gcc_assert (m->fs.cfa_reg == src); |
| m->fs.cfa_offset += INTVAL (offset); |
| m->fs.cfa_reg = dest; |
| |
| r = gen_rtx_PLUS (Pmode, src, offset); |
| r = gen_rtx_SET (VOIDmode, dest, r); |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| else if (style < 0) |
| { |
| RTX_FRAME_RELATED_P (insn) = 1; |
| if (add_frame_related_expr) |
| { |
| rtx r = gen_rtx_PLUS (Pmode, src, offset); |
| r = gen_rtx_SET (VOIDmode, dest, r); |
| add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); |
| } |
| } |
| |
| if (dest == stack_pointer_rtx) |
| { |
| HOST_WIDE_INT ooffset = m->fs.sp_offset; |
| bool valid = m->fs.sp_valid; |
| |
| if (src == hard_frame_pointer_rtx) |
| { |
| valid = m->fs.fp_valid; |
| ooffset = m->fs.fp_offset; |
| } |
| else if (src == crtl->drap_reg) |
| { |
| valid = m->fs.drap_valid; |
| ooffset = 0; |
| } |
| else |
| { |
| /* Else there are two possibilities: SP itself, which we set |
| up as the default above. Or EH_RETURN_STACKADJ_RTX, which is |
| taken care of this by hand along the eh_return path. */ |
| gcc_checking_assert (src == stack_pointer_rtx |
| || offset == const0_rtx); |
| } |
| |
| m->fs.sp_offset = ooffset - INTVAL (offset); |
| m->fs.sp_valid = valid; |
| } |
| } |
| |
| /* Find an available register to be used as dynamic realign argument |
| pointer regsiter. Such a register will be written in prologue and |
| used in begin of body, so it must not be |
| 1. parameter passing register. |
| 2. GOT pointer. |
| We reuse static-chain register if it is available. Otherwise, we |
| use DI for i386 and R13 for x86-64. We chose R13 since it has |
| shorter encoding. |
| |
| Return: the regno of chosen register. */ |
| |
| static unsigned int |
| find_drap_reg (void) |
| { |
| tree decl = cfun->decl; |
| |
| if (TARGET_64BIT) |
| { |
| /* Use R13 for nested function or function need static chain. |
| Since function with tail call may use any caller-saved |
| registers in epilogue, DRAP must not use caller-saved |
| register in such case. */ |
| if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) |
| return R13_REG; |
| |
| return R10_REG; |
| } |
| else |
| { |
| /* Use DI for nested function or function need static chain. |
| Since function with tail call may use any caller-saved |
| registers in epilogue, DRAP must not use caller-saved |
| register in such case. */ |
| if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) |
| return DI_REG; |
| |
| /* Reuse static chain register if it isn't used for parameter |
| passing. */ |
| if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) |
| { |
| unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); |
| if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) |
| return CX_REG; |
| } |
| return DI_REG; |
| } |
| } |
| |
| /* Return minimum incoming stack alignment. */ |
| |
| static unsigned int |
| ix86_minimum_incoming_stack_boundary (bool sibcall) |
| { |
| unsigned int incoming_stack_boundary; |
| |
| /* Prefer the one specified at command line. */ |
| if (ix86_user_incoming_stack_boundary) |
| incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
| /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
| if -mstackrealign is used, it isn't used for sibcall check and |
| estimated stack alignment is 128bit. */ |
| else if (!sibcall |
| && !TARGET_64BIT |
| && ix86_force_align_arg_pointer |
| && crtl->stack_alignment_estimated == 128) |
| incoming_stack_boundary = MIN_STACK_BOUNDARY; |
| else |
| incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
| |
| /* Incoming stack alignment can be changed on individual functions |
| via force_align_arg_pointer attribute. We use the smallest |
| incoming stack boundary. */ |
| if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
| && lookup_attribute (ix86_force_align_arg_pointer_string, |
| TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
| incoming_stack_boundary = MIN_STACK_BOUNDARY; |
| |
| /* The incoming stack frame has to be aligned at least at |
| parm_stack_boundary. */ |
| if (incoming_stack_boundary < crtl->parm_stack_boundary) |
| incoming_stack_boundary = crtl->parm_stack_boundary; |
| |
| /* Stack at entrance of main is aligned by runtime. We use the |
| smallest incoming stack boundary. */ |
| if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
| && DECL_NAME (current_function_decl) |
| && MAIN_NAME_P (DECL_NAME (current_function_decl)) |
| && DECL_FILE_SCOPE_P (current_function_decl)) |
| incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
| |
| return incoming_stack_boundary; |
| } |
| |
| /* Update incoming stack boundary and estimated stack alignment. */ |
| |
| static void |
| ix86_update_stack_boundary (void) |
| { |
| ix86_incoming_stack_boundary |
| = ix86_minimum_incoming_stack_boundary (false); |
| |
| /* x86_64 vararg needs 16byte stack alignment for register save |
| area. */ |
| if (TARGET_64BIT |
| && cfun->stdarg |
| && crtl->stack_alignment_estimated < 128) |
| crtl->stack_alignment_estimated = 128; |
| } |
| |
| /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is |
| needed or an rtx for DRAP otherwise. */ |
| |
| static rtx |
| ix86_get_drap_rtx (void) |
| { |
| if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) |
| crtl->need_drap = true; |
| |
| if (stack_realign_drap) |
| { |
| /* Assign DRAP to vDRAP and returns vDRAP */ |
| unsigned int regno = find_drap_reg (); |
| rtx drap_vreg; |
| rtx arg_ptr; |
| rtx seq, insn; |
| |
| arg_ptr = gen_rtx_REG (Pmode, regno); |
| crtl->drap_reg = arg_ptr; |
| |
| start_sequence (); |
| drap_vreg = copy_to_reg (arg_ptr); |
| seq = get_insns (); |
| end_sequence (); |
| |
| insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); |
| if (!optimize) |
| { |
| add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| return drap_vreg; |
| } |
| else |
| return NULL; |
| } |
| |
| /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ |
| |
| static rtx |
| ix86_internal_arg_pointer (void) |
| { |
| return virtual_incoming_args_rtx; |
| } |
| |
| struct scratch_reg { |
| rtx reg; |
| bool saved; |
| }; |
| |
| /* Return a short-lived scratch register for use on function entry. |
| In 32-bit mode, it is valid only after the registers are saved |
| in the prologue. This register must be released by means of |
| release_scratch_register_on_entry once it is dead. */ |
| |
| static void |
| get_scratch_register_on_entry (struct scratch_reg *sr) |
| { |
| int regno; |
| |
| sr->saved = false; |
| |
| if (TARGET_64BIT) |
| { |
| /* We always use R11 in 64-bit mode. */ |
| regno = R11_REG; |
| } |
| else |
| { |
| tree decl = current_function_decl, fntype = TREE_TYPE (decl); |
| bool fastcall_p |
| = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
| bool thiscall_p |
| = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; |
| bool static_chain_p = DECL_STATIC_CHAIN (decl); |
| int regparm = ix86_function_regparm (fntype, decl); |
| int drap_regno |
| = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; |
| |
| /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax |
| for the static chain register. */ |
| if ((regparm < 1 || (fastcall_p && !static_chain_p)) |
| && drap_regno != AX_REG) |
| regno = AX_REG; |
| /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx |
| for the static chain register. */ |
| else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) |
| regno = AX_REG; |
| else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) |
| regno = DX_REG; |
| /* ecx is the static chain register. */ |
| else if (regparm < 3 && !fastcall_p && !thiscall_p |
| && !static_chain_p |
| && drap_regno != CX_REG) |
| regno = CX_REG; |
| else if (ix86_save_reg (BX_REG, true)) |
| regno = BX_REG; |
| /* esi is the static chain register. */ |
| else if (!(regparm == 3 && static_chain_p) |
| && ix86_save_reg (SI_REG, true)) |
| regno = SI_REG; |
| else if (ix86_save_reg (DI_REG, true)) |
| regno = DI_REG; |
| else |
| { |
| regno = (drap_regno == AX_REG ? DX_REG : AX_REG); |
| sr->saved = true; |
| } |
| } |
| |
| sr->reg = gen_rtx_REG (Pmode, regno); |
| if (sr->saved) |
| { |
| rtx insn = emit_insn (gen_push (sr->reg)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| /* Release a scratch register obtained from the preceding function. */ |
| |
| static void |
| release_scratch_register_on_entry (struct scratch_reg *sr) |
| { |
| if (sr->saved) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx x, insn = emit_insn (gen_pop (sr->reg)); |
| |
| /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */ |
| RTX_FRAME_RELATED_P (insn) = 1; |
| x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); |
| x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); |
| add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); |
| m->fs.sp_offset -= UNITS_PER_WORD; |
| } |
| } |
| |
| #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) |
| |
| /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ |
| |
| static void |
| ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) |
| { |
| /* We skip the probe for the first interval + a small dope of 4 words and |
| probe that many bytes past the specified size to maintain a protection |
| area at the botton of the stack. */ |
| const int dope = 4 * UNITS_PER_WORD; |
| rtx size_rtx = GEN_INT (size), last; |
| |
| /* See if we have a constant small number of probes to generate. If so, |
| that's the easy case. The run-time loop is made up of 11 insns in the |
| generic case while the compile-time loop is made up of 3+2*(n-1) insns |
| for n # of intervals. */ |
| if (size <= 5 * PROBE_INTERVAL) |
| { |
| HOST_WIDE_INT i, adjust; |
| bool first_probe = true; |
| |
| /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for |
| values of N from 1 until it exceeds SIZE. If only one probe is |
| needed, this will not generate any code. Then adjust and probe |
| to PROBE_INTERVAL + SIZE. */ |
| for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) |
| { |
| if (first_probe) |
| { |
| adjust = 2 * PROBE_INTERVAL + dope; |
| first_probe = false; |
| } |
| else |
| adjust = PROBE_INTERVAL; |
| |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| -adjust))); |
| emit_stack_probe (stack_pointer_rtx); |
| } |
| |
| if (first_probe) |
| adjust = size + PROBE_INTERVAL + dope; |
| else |
| adjust = size + PROBE_INTERVAL - i; |
| |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| -adjust))); |
| emit_stack_probe (stack_pointer_rtx); |
| |
| /* Adjust back to account for the additional first interval. */ |
| last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| PROBE_INTERVAL + dope))); |
| } |
| |
| /* Otherwise, do the same as above, but in a loop. Note that we must be |
| extra careful with variables wrapping around because we might be at |
| the very top (or the very bottom) of the address space and we have |
| to be able to handle this case properly; in particular, we use an |
| equality test for the loop condition. */ |
| else |
| { |
| HOST_WIDE_INT rounded_size; |
| struct scratch_reg sr; |
| |
| get_scratch_register_on_entry (&sr); |
| |
| |
| /* Step 1: round SIZE to the previous multiple of the interval. */ |
| |
| rounded_size = size & -PROBE_INTERVAL; |
| |
| |
| /* Step 2: compute initial and final value of the loop counter. */ |
| |
| /* SP = SP_0 + PROBE_INTERVAL. */ |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| - (PROBE_INTERVAL + dope)))); |
| |
| /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ |
| emit_move_insn (sr.reg, GEN_INT (-rounded_size)); |
| emit_insn (gen_rtx_SET (VOIDmode, sr.reg, |
| gen_rtx_PLUS (Pmode, sr.reg, |
| stack_pointer_rtx))); |
| |
| |
| /* Step 3: the loop |
| |
| while (SP != LAST_ADDR) |
| { |
| SP = SP + PROBE_INTERVAL |
| probe at SP |
| } |
| |
| adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for |
| values of N from 1 until it is equal to ROUNDED_SIZE. */ |
| |
| emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); |
| |
| |
| /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot |
| assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ |
| |
| if (size != rounded_size) |
| { |
| emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| rounded_size - size))); |
| emit_stack_probe (stack_pointer_rtx); |
| } |
| |
| /* Adjust back to account for the additional first interval. */ |
| last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| PROBE_INTERVAL + dope))); |
| |
| release_scratch_register_on_entry (&sr); |
| } |
| |
| gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx); |
| |
| /* Even if the stack pointer isn't the CFA register, we need to correctly |
| describe the adjustments made to it, in particular differentiate the |
| frame-related ones from the frame-unrelated ones. */ |
| if (size > 0) |
| { |
| rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); |
| XVECEXP (expr, 0, 0) |
| = gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, -size)); |
| XVECEXP (expr, 0, 1) |
| = gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| PROBE_INTERVAL + dope + size)); |
| add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); |
| RTX_FRAME_RELATED_P (last) = 1; |
| |
| cfun->machine->fs.sp_offset += size; |
| } |
| |
| /* Make sure nothing is scheduled before we are done. */ |
| emit_insn (gen_blockage ()); |
| } |
| |
| /* Adjust the stack pointer up to REG while probing it. */ |
| |
| const char * |
| output_adjust_stack_and_probe (rtx reg) |
| { |
| static int labelno = 0; |
| char loop_lab[32], end_lab[32]; |
| rtx xops[2]; |
| |
| ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); |
| ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); |
| |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| |
| /* Jump to END_LAB if SP == LAST_ADDR. */ |
| xops[0] = stack_pointer_rtx; |
| xops[1] = reg; |
| output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); |
| fputs ("\tje\t", asm_out_file); |
| assemble_name_raw (asm_out_file, end_lab); |
| fputc ('\n', asm_out_file); |
| |
| /* SP = SP + PROBE_INTERVAL. */ |
| xops[1] = GEN_INT (PROBE_INTERVAL); |
| output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); |
| |
| /* Probe at SP. */ |
| xops[1] = const0_rtx; |
| output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); |
| |
| fprintf (asm_out_file, "\tjmp\t"); |
| assemble_name_raw (asm_out_file, loop_lab); |
| fputc ('\n', asm_out_file); |
| |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); |
| |
| return ""; |
| } |
| |
| /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
| inclusive. These are offsets from the current stack pointer. */ |
| |
| static void |
| ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) |
| { |
| /* See if we have a constant small number of probes to generate. If so, |
| that's the easy case. The run-time loop is made up of 7 insns in the |
| generic case while the compile-time loop is made up of n insns for n # |
| of intervals. */ |
| if (size <= 7 * PROBE_INTERVAL) |
| { |
| HOST_WIDE_INT i; |
| |
| /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
| it exceeds SIZE. If only one probe is needed, this will not |
| generate any code. Then probe at FIRST + SIZE. */ |
| for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + i))); |
| |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + size))); |
| } |
| |
| /* Otherwise, do the same as above, but in a loop. Note that we must be |
| extra careful with variables wrapping around because we might be at |
| the very top (or the very bottom) of the address space and we have |
| to be able to handle this case properly; in particular, we use an |
| equality test for the loop condition. */ |
| else |
| { |
| HOST_WIDE_INT rounded_size, last; |
| struct scratch_reg sr; |
| |
| get_scratch_register_on_entry (&sr); |
| |
| |
| /* Step 1: round SIZE to the previous multiple of the interval. */ |
| |
| rounded_size = size & -PROBE_INTERVAL; |
| |
| |
| /* Step 2: compute initial and final value of the loop counter. */ |
| |
| /* TEST_OFFSET = FIRST. */ |
| emit_move_insn (sr.reg, GEN_INT (-first)); |
| |
| /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ |
| last = first + rounded_size; |
| |
| |
| /* Step 3: the loop |
| |
| while (TEST_ADDR != LAST_ADDR) |
| { |
| TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
| probe at TEST_ADDR |
| } |
| |
| probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
| until it is equal to ROUNDED_SIZE. */ |
| |
| emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); |
| |
| |
| /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
| that SIZE is equal to ROUNDED_SIZE. */ |
| |
| if (size != rounded_size) |
| emit_stack_probe (plus_constant (Pmode, |
| gen_rtx_PLUS (Pmode, |
| stack_pointer_rtx, |
| sr.reg), |
| rounded_size - size)); |
| |
| release_scratch_register_on_entry (&sr); |
| } |
| |
| /* Make sure nothing is scheduled before we are done. */ |
| emit_insn (gen_blockage ()); |
| } |
| |
| /* Probe a range of stack addresses from REG to END, inclusive. These are |
| offsets from the current stack pointer. */ |
| |
| const char * |
| output_probe_stack_range (rtx reg, rtx end) |
| { |
| static int labelno = 0; |
| char loop_lab[32], end_lab[32]; |
| rtx xops[3]; |
| |
| ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); |
| ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); |
| |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| |
| /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ |
| xops[0] = reg; |
| xops[1] = end; |
| output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); |
| fputs ("\tje\t", asm_out_file); |
| assemble_name_raw (asm_out_file, end_lab); |
| fputc ('\n', asm_out_file); |
| |
| /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
| xops[1] = GEN_INT (PROBE_INTERVAL); |
| output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); |
| |
| /* Probe at TEST_ADDR. */ |
| xops[0] = stack_pointer_rtx; |
| xops[1] = reg; |
| xops[2] = const0_rtx; |
| output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); |
| |
| fprintf (asm_out_file, "\tjmp\t"); |
| assemble_name_raw (asm_out_file, loop_lab); |
| fputc ('\n', asm_out_file); |
| |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); |
| |
| return ""; |
| } |
| |
| /* Finalize stack_realign_needed flag, which will guide prologue/epilogue |
| to be generated in correct form. */ |
| static void |
| ix86_finalize_stack_realign_flags (void) |
| { |
| /* Check if stack realign is really needed after reload, and |
| stores result in cfun */ |
| unsigned int incoming_stack_boundary |
| = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary |
| ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); |
| unsigned int stack_realign = (incoming_stack_boundary |
| < (crtl->is_leaf |
| ? crtl->max_used_stack_slot_alignment |
| : crtl->stack_alignment_needed)); |
| |
| if (crtl->stack_realign_finalized) |
| { |
| /* After stack_realign_needed is finalized, we can't no longer |
| change it. */ |
| gcc_assert (crtl->stack_realign_needed == stack_realign); |
| return; |
| } |
| |
| /* If the only reason for frame_pointer_needed is that we conservatively |
| assumed stack realignment might be needed, but in the end nothing that |
| needed the stack alignment had been spilled, clear frame_pointer_needed |
| and say we don't need stack realignment. */ |
| if (stack_realign |
| && frame_pointer_needed |
| && crtl->is_leaf |
| && flag_omit_frame_pointer |
| && crtl->sp_is_unchanging |
| && !ix86_current_function_calls_tls_descriptor |
| && !crtl->accesses_prior_frames |
| && !cfun->calls_alloca |
| && !crtl->calls_eh_return |
| && !(flag_stack_check && STACK_CHECK_MOVING_SP) |
| && !ix86_frame_pointer_required () |
| && get_frame_size () == 0 |
| && ix86_nsaved_sseregs () == 0 |
| && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) |
| { |
| HARD_REG_SET set_up_by_prologue, prologue_used; |
| basic_block bb; |
| |
| CLEAR_HARD_REG_SET (prologue_used); |
| CLEAR_HARD_REG_SET (set_up_by_prologue); |
| add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); |
| add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); |
| add_to_hard_reg_set (&set_up_by_prologue, Pmode, |
| HARD_FRAME_POINTER_REGNUM); |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| rtx insn; |
| FOR_BB_INSNS (bb, insn) |
| if (NONDEBUG_INSN_P (insn) |
| && requires_stack_frame_p (insn, prologue_used, |
| set_up_by_prologue)) |
| { |
| crtl->stack_realign_needed = stack_realign; |
| crtl->stack_realign_finalized = true; |
| return; |
| } |
| } |
| |
| /* If drap has been set, but it actually isn't live at the start |
| of the function, there is no reason to set it up. */ |
| if (crtl->drap_reg) |
| { |
| basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; |
| if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg))) |
| { |
| crtl->drap_reg = NULL_RTX; |
| crtl->need_drap = false; |
| } |
| } |
| else |
| cfun->machine->no_drap_save_restore = true; |
| |
| frame_pointer_needed = false; |
| stack_realign = false; |
| crtl->max_used_stack_slot_alignment = incoming_stack_boundary; |
| crtl->stack_alignment_needed = incoming_stack_boundary; |
| crtl->stack_alignment_estimated = incoming_stack_boundary; |
| if (crtl->preferred_stack_boundary > incoming_stack_boundary) |
| crtl->preferred_stack_boundary = incoming_stack_boundary; |
| df_finish_pass (true); |
| df_scan_alloc (NULL); |
| df_scan_blocks (); |
| df_compute_regs_ever_live (true); |
| df_analyze (); |
| } |
| |
| crtl->stack_realign_needed = stack_realign; |
| crtl->stack_realign_finalized = true; |
| } |
| |
| /* Expand the prologue into a bunch of separate insns. */ |
| |
| void |
| ix86_expand_prologue (void) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx insn, t; |
| bool pic_reg_used; |
| struct ix86_frame frame; |
| HOST_WIDE_INT allocate; |
| bool int_registers_saved; |
| bool sse_registers_saved; |
| |
| ix86_finalize_stack_realign_flags (); |
| |
| /* DRAP should not coexist with stack_realign_fp */ |
| gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
| |
| memset (&m->fs, 0, sizeof (m->fs)); |
| |
| /* Initialize CFA state for before the prologue. */ |
| m->fs.cfa_reg = stack_pointer_rtx; |
| m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; |
| |
| /* Track SP offset to the CFA. We continue tracking this after we've |
| swapped the CFA register away from SP. In the case of re-alignment |
| this is fudged; we're interested to offsets within the local frame. */ |
| m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
| m->fs.sp_valid = true; |
| |
| ix86_compute_frame_layout (&frame); |
| |
| if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) |
| { |
| /* We should have already generated an error for any use of |
| ms_hook on a nested function. */ |
| gcc_checking_assert (!ix86_static_chain_on_stack); |
| |
| /* Check if profiling is active and we shall use profiling before |
| prologue variant. If so sorry. */ |
| if (crtl->profile && flag_fentry != 0) |
| sorry ("ms_hook_prologue attribute isn%'t compatible " |
| "with -mfentry for 32-bit"); |
| |
| /* In ix86_asm_output_function_label we emitted: |
| 8b ff movl.s %edi,%edi |
| 55 push %ebp |
| 8b ec movl.s %esp,%ebp |
| |
| This matches the hookable function prologue in Win32 API |
| functions in Microsoft Windows XP Service Pack 2 and newer. |
| Wine uses this to enable Windows apps to hook the Win32 API |
| functions provided by Wine. |
| |
| What that means is that we've already set up the frame pointer. */ |
| |
| if (frame_pointer_needed |
| && !(crtl->drap_reg && crtl->stack_realign_needed)) |
| { |
| rtx push, mov; |
| |
| /* We've decided to use the frame pointer already set up. |
| Describe this to the unwinder by pretending that both |
| push and mov insns happen right here. |
| |
| Putting the unwind info here at the end of the ms_hook |
| is done so that we can make absolutely certain we get |
| the required byte sequence at the start of the function, |
| rather than relying on an assembler that can produce |
| the exact encoding required. |
| |
| However it does mean (in the unpatched case) that we have |
| a 1 insn window where the asynchronous unwind info is |
| incorrect. However, if we placed the unwind info at |
| its correct location we would have incorrect unwind info |
| in the patched case. Which is probably all moot since |
| I don't expect Wine generates dwarf2 unwind info for the |
| system libraries that use this feature. */ |
| |
| insn = emit_insn (gen_blockage ()); |
| |
| push = gen_push (hard_frame_pointer_rtx); |
| mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, |
| stack_pointer_rtx); |
| RTX_FRAME_RELATED_P (push) = 1; |
| RTX_FRAME_RELATED_P (mov) = 1; |
| |
| RTX_FRAME_RELATED_P (insn) = 1; |
| add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); |
| |
| /* Note that gen_push incremented m->fs.cfa_offset, even |
| though we didn't emit the push insn here. */ |
| m->fs.cfa_reg = hard_frame_pointer_rtx; |
| m->fs.fp_offset = m->fs.cfa_offset; |
| m->fs.fp_valid = true; |
| } |
| else |
| { |
| /* The frame pointer is not needed so pop %ebp again. |
| This leaves us with a pristine state. */ |
| emit_insn (gen_pop (hard_frame_pointer_rtx)); |
| } |
| } |
| |
| /* The first insn of a function that accepts its static chain on the |
| stack is to push the register that would be filled in by a direct |
| call. This insn will be skipped by the trampoline. */ |
| else if (ix86_static_chain_on_stack) |
| { |
| insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false))); |
| emit_insn (gen_blockage ()); |
| |
| /* We don't want to interpret this push insn as a register save, |
| only as a stack adjustment. The real copy of the register as |
| a save will be done later, if needed. */ |
| t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); |
| t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* Emit prologue code to adjust stack alignment and setup DRAP, in case |
| of DRAP is needed and stack realignment is really needed after reload */ |
| if (stack_realign_drap) |
| { |
| int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| |
| /* Only need to push parameter pointer reg if it is caller saved. */ |
| if (!call_used_regs[REGNO (crtl->drap_reg)]) |
| { |
| /* Push arg pointer reg */ |
| insn = emit_insn (gen_push (crtl->drap_reg)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* Grab the argument pointer. */ |
| t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); |
| insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| m->fs.cfa_reg = crtl->drap_reg; |
| m->fs.cfa_offset = 0; |
| |
| /* Align the stack. */ |
| insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (-align_bytes))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* Replicate the return address on the stack so that return |
| address can be reached via (argp - 1) slot. This is needed |
| to implement macro RETURN_ADDR_RTX and intrinsic function |
| expand_builtin_return_addr etc. */ |
| t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); |
| t = gen_frame_mem (word_mode, t); |
| insn = emit_insn (gen_push (t)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* For the purposes of frame and register save area addressing, |
| we've started over with a new frame. */ |
| m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; |
| m->fs.realigned = true; |
| } |
| |
| int_registers_saved = (frame.nregs == 0); |
| sse_registers_saved = (frame.nsseregs == 0); |
| |
| if (frame_pointer_needed && !m->fs.fp_valid) |
| { |
| /* Note: AT&T enter does NOT have reversed args. Enter is probably |
| slower on all targets. Also sdb doesn't like it. */ |
| insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* Push registers now, before setting the frame pointer |
| on SEH target. */ |
| if (!int_registers_saved |
| && TARGET_SEH |
| && !frame.save_regs_using_mov) |
| { |
| ix86_emit_save_regs (); |
| int_registers_saved = true; |
| gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
| } |
| |
| if (m->fs.sp_offset == frame.hard_frame_pointer_offset) |
| { |
| insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| if (m->fs.cfa_reg == stack_pointer_rtx) |
| m->fs.cfa_reg = hard_frame_pointer_rtx; |
| m->fs.fp_offset = m->fs.sp_offset; |
| m->fs.fp_valid = true; |
| } |
| } |
| |
| if (!int_registers_saved) |
| { |
| /* If saving registers via PUSH, do so now. */ |
| if (!frame.save_regs_using_mov) |
| { |
| ix86_emit_save_regs (); |
| int_registers_saved = true; |
| gcc_assert (m->fs.sp_offset == frame.reg_save_offset); |
| } |
| |
| /* When using red zone we may start register saving before allocating |
| the stack frame saving one cycle of the prologue. However, avoid |
| doing this if we have to probe the stack; at least on x86_64 the |
| stack probe can turn into a call that clobbers a red zone location. */ |
| else if (ix86_using_red_zone () |
| && (! TARGET_STACK_PROBE |
| || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) |
| { |
| ix86_emit_save_regs_using_mov (frame.reg_save_offset); |
| int_registers_saved = true; |
| } |
| } |
| |
| if (stack_realign_fp) |
| { |
| int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
| gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| /* The computation of the size of the re-aligned stack frame means |
| that we must allocate the size of the register save area before |
| performing the actual alignment. Otherwise we cannot guarantee |
| that there's enough storage above the realignment point. */ |
| if (m->fs.sp_offset != frame.sse_reg_save_offset) |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (m->fs.sp_offset |
| - frame.sse_reg_save_offset), |
| -1, false); |
| |
| /* Align the stack. */ |
| insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (-align_bytes))); |
| |
| /* For the purposes of register save area addressing, the stack |
| pointer is no longer valid. As for the value of sp_offset, |
| see ix86_compute_frame_layout, which we need to match in order |
| to pass verification of stack_pointer_offset at the end. */ |
| m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes; |
| m->fs.sp_valid = false; |
| } |
| |
| allocate = frame.stack_pointer_offset - m->fs.sp_offset; |
| |
| if (flag_stack_usage_info) |
| { |
| /* We start to count from ARG_POINTER. */ |
| HOST_WIDE_INT stack_size = frame.stack_pointer_offset; |
| |
| /* If it was realigned, take into account the fake frame. */ |
| if (stack_realign_drap) |
| { |
| if (ix86_static_chain_on_stack) |
| stack_size += UNITS_PER_WORD; |
| |
| if (!call_used_regs[REGNO (crtl->drap_reg)]) |
| stack_size += UNITS_PER_WORD; |
| |
| /* This over-estimates by 1 minimal-stack-alignment-unit but |
| mitigates that by counting in the new return address slot. */ |
| current_function_dynamic_stack_size |
| += crtl->stack_alignment_needed / BITS_PER_UNIT; |
| } |
| |
| current_function_static_stack_size = stack_size; |
| } |
| |
| /* On SEH target with very large frame size, allocate an area to save |
| SSE registers (as the very large allocation won't be described). */ |
| if (TARGET_SEH |
| && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE |
| && !sse_registers_saved) |
| { |
| HOST_WIDE_INT sse_size = |
| frame.sse_reg_save_offset - frame.reg_save_offset; |
| |
| gcc_assert (int_registers_saved); |
| |
| /* No need to do stack checking as the area will be immediately |
| written. */ |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-sse_size), -1, |
| m->fs.cfa_reg == stack_pointer_rtx); |
| allocate -= sse_size; |
| ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); |
| sse_registers_saved = true; |
| } |
| |
| /* The stack has already been decremented by the instruction calling us |
| so probe if the size is non-negative to preserve the protection area. */ |
| if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
| { |
| /* We expect the registers to be saved when probes are used. */ |
| gcc_assert (int_registers_saved); |
| |
| if (STACK_CHECK_MOVING_SP) |
| { |
| if (!(crtl->is_leaf && !cfun->calls_alloca |
| && allocate <= PROBE_INTERVAL)) |
| { |
| ix86_adjust_stack_and_probe (allocate); |
| allocate = 0; |
| } |
| } |
| else |
| { |
| HOST_WIDE_INT size = allocate; |
| |
| if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000) |
| size = 0x80000000 - STACK_CHECK_PROTECT - 1; |
| |
| if (TARGET_STACK_PROBE) |
| { |
| if (crtl->is_leaf && !cfun->calls_alloca) |
| { |
| if (size > PROBE_INTERVAL) |
| ix86_emit_probe_stack_range (0, size); |
| } |
| else |
| ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT); |
| } |
| else |
| { |
| if (crtl->is_leaf && !cfun->calls_alloca) |
| { |
| if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) |
| ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, |
| size - STACK_CHECK_PROTECT); |
| } |
| else |
| ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size); |
| } |
| } |
| } |
| |
| if (allocate == 0) |
| ; |
| else if (!ix86_target_stack_probe () |
| || frame.stack_pointer_offset < CHECK_STACK_LIMIT) |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-allocate), -1, |
| m->fs.cfa_reg == stack_pointer_rtx); |
| } |
| else |
| { |
| rtx eax = gen_rtx_REG (Pmode, AX_REG); |
| rtx r10 = NULL; |
| rtx (*adjust_stack_insn)(rtx, rtx, rtx); |
| const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); |
| bool eax_live = ix86_eax_live_at_start_p (); |
| bool r10_live = false; |
| |
| if (TARGET_64BIT) |
| r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); |
| |
| if (eax_live) |
| { |
| insn = emit_insn (gen_push (eax)); |
| allocate -= UNITS_PER_WORD; |
| /* Note that SEH directives need to continue tracking the stack |
| pointer even after the frame pointer has been set up. */ |
| if (sp_is_cfa_reg || TARGET_SEH) |
| { |
| if (sp_is_cfa_reg) |
| m->fs.cfa_offset += UNITS_PER_WORD; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| if (r10_live) |
| { |
| r10 = gen_rtx_REG (Pmode, R10_REG); |
| insn = emit_insn (gen_push (r10)); |
| allocate -= UNITS_PER_WORD; |
| if (sp_is_cfa_reg || TARGET_SEH) |
| { |
| if (sp_is_cfa_reg) |
| m->fs.cfa_offset += UNITS_PER_WORD; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| emit_move_insn (eax, GEN_INT (allocate)); |
| emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); |
| |
| /* Use the fact that AX still contains ALLOCATE. */ |
| adjust_stack_insn = (Pmode == DImode |
| ? gen_pro_epilogue_adjust_stack_di_sub |
| : gen_pro_epilogue_adjust_stack_si_sub); |
| |
| insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, |
| stack_pointer_rtx, eax)); |
| |
| if (sp_is_cfa_reg || TARGET_SEH) |
| { |
| if (sp_is_cfa_reg) |
| m->fs.cfa_offset += allocate; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| -allocate))); |
| } |
| m->fs.sp_offset += allocate; |
| |
| /* Use stack_pointer_rtx for relative addressing so that code |
| works for realigned stack, too. */ |
| if (r10_live && eax_live) |
| { |
| t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
| emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
| gen_frame_mem (word_mode, t)); |
| t = plus_constant (Pmode, t, UNITS_PER_WORD); |
| emit_move_insn (gen_rtx_REG (word_mode, AX_REG), |
| gen_frame_mem (word_mode, t)); |
| } |
| else if (eax_live || r10_live) |
| { |
| t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); |
| emit_move_insn (gen_rtx_REG (word_mode, |
| (eax_live ? AX_REG : R10_REG)), |
| gen_frame_mem (word_mode, t)); |
| } |
| } |
| gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); |
| |
| /* If we havn't already set up the frame pointer, do so now. */ |
| if (frame_pointer_needed && !m->fs.fp_valid) |
| { |
| insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (frame.stack_pointer_offset |
| - frame.hard_frame_pointer_offset)); |
| insn = emit_insn (insn); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); |
| |
| if (m->fs.cfa_reg == stack_pointer_rtx) |
| m->fs.cfa_reg = hard_frame_pointer_rtx; |
| m->fs.fp_offset = frame.hard_frame_pointer_offset; |
| m->fs.fp_valid = true; |
| } |
| |
| if (!int_registers_saved) |
| ix86_emit_save_regs_using_mov (frame.reg_save_offset); |
| if (!sse_registers_saved) |
| ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); |
| |
| pic_reg_used = false; |
| /* We don't use pic-register for pe-coff target. */ |
| if (pic_offset_table_rtx |
| && !TARGET_PECOFF |
| && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) |
| || crtl->profile)) |
| { |
| unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); |
| |
| if (alt_pic_reg_used != INVALID_REGNUM) |
| SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used); |
| |
| pic_reg_used = true; |
| } |
| |
| if (pic_reg_used) |
| { |
| if (TARGET_64BIT) |
| { |
| if (ix86_cmodel == CM_LARGE_PIC) |
| { |
| rtx label, tmp_reg; |
| |
| gcc_assert (Pmode == DImode); |
| label = gen_label_rtx (); |
| emit_label (label); |
| LABEL_PRESERVE_P (label) = 1; |
| tmp_reg = gen_rtx_REG (Pmode, R11_REG); |
| gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg)); |
| insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
| label)); |
| insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
| insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx, |
| pic_offset_table_rtx, tmp_reg)); |
| } |
| else |
| insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
| } |
| else |
| { |
| insn = emit_insn (gen_set_got (pic_offset_table_rtx)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
| } |
| } |
| |
| /* In the pic_reg_used case, make sure that the got load isn't deleted |
| when mcount needs it. Blockage to avoid call movement across mcount |
| call is emitted in generic code after the NOTE_INSN_PROLOGUE_END |
| note. */ |
| if (crtl->profile && !flag_fentry && pic_reg_used) |
| emit_insn (gen_prologue_use (pic_offset_table_rtx)); |
| |
| if (crtl->drap_reg && !crtl->stack_realign_needed) |
| { |
| /* vDRAP is setup but after reload it turns out stack realign |
| isn't necessary, here we will emit prologue to setup DRAP |
| without stack realign adjustment */ |
| t = choose_baseaddr (0); |
| emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t)); |
| } |
| |
| /* Prevent instructions from being scheduled into register save push |
| sequence when access to the redzone area is done through frame pointer. |
| The offset between the frame pointer and the stack pointer is calculated |
| relative to the value of the stack pointer at the end of the function |
| prologue, and moving instructions that access redzone area via frame |
| pointer inside push sequence violates this assumption. */ |
| if (frame_pointer_needed && frame.red_zone_size) |
| emit_insn (gen_memory_blockage ()); |
| |
| /* Emit cld instruction if stringops are used in the function. */ |
| if (TARGET_CLD && ix86_current_function_needs_cld) |
| emit_insn (gen_cld ()); |
| |
| /* SEH requires that the prologue end within 256 bytes of the start of |
| the function. Prevent instruction schedules that would extend that. |
| Further, prevent alloca modifications to the stack pointer from being |
| combined with prologue modifications. */ |
| if (TARGET_SEH) |
| emit_insn (gen_prologue_use (stack_pointer_rtx)); |
| } |
| |
| /* Emit code to restore REG using a POP insn. */ |
| |
| static void |
| ix86_emit_restore_reg_using_pop (rtx reg) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx insn = emit_insn (gen_pop (reg)); |
| |
| ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); |
| m->fs.sp_offset -= UNITS_PER_WORD; |
| |
| if (m->fs.cfa_reg == crtl->drap_reg |
| && REGNO (reg) == REGNO (crtl->drap_reg)) |
| { |
| /* Previously we'd represented the CFA as an expression |
| like *(%ebp - 8). We've just popped that value from |
| the stack, which means we need to reset the CFA to |
| the drap register. This will remain until we restore |
| the stack pointer. */ |
| add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* This means that the DRAP register is valid for addressing too. */ |
| m->fs.drap_valid = true; |
| return; |
| } |
| |
| if (m->fs.cfa_reg == stack_pointer_rtx) |
| { |
| rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| m->fs.cfa_offset -= UNITS_PER_WORD; |
| } |
| |
| /* When the frame pointer is the CFA, and we pop it, we are |
| swapping back to the stack pointer as the CFA. This happens |
| for stack frames that don't allocate other data, so we assume |
| the stack pointer is now pointing at the return address, i.e. |
| the function entry state, which makes the offset be 1 word. */ |
| if (reg == hard_frame_pointer_rtx) |
| { |
| m->fs.fp_valid = false; |
| if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
| { |
| m->fs.cfa_reg = stack_pointer_rtx; |
| m->fs.cfa_offset -= UNITS_PER_WORD; |
| |
| add_reg_note (insn, REG_CFA_DEF_CFA, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (m->fs.cfa_offset))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| } |
| |
| /* Emit code to restore saved registers using POP insns. */ |
| |
| static void |
| ix86_emit_restore_regs_using_pop (void) |
| { |
| unsigned int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) |
| ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); |
| } |
| |
| /* Emit code and notes for the LEAVE instruction. */ |
| |
| static void |
| ix86_emit_leave (void) |
| { |
| struct machine_function *m = cfun->machine; |
| rtx insn = emit_insn (ix86_gen_leave ()); |
| |
| ix86_add_queued_cfa_restore_notes (insn); |
| |
| gcc_assert (m->fs.fp_valid); |
| m->fs.sp_valid = true; |
| m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; |
| m->fs.fp_valid = false; |
| |
| if (m->fs.cfa_reg == hard_frame_pointer_rtx) |
| { |
| m->fs.cfa_reg = stack_pointer_rtx; |
| m->fs.cfa_offset = m->fs.sp_offset; |
| |
| add_reg_note (insn, REG_CFA_DEF_CFA, |
| plus_constant (Pmode, stack_pointer_rtx, |
| m->fs.sp_offset)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, |
| m->fs.fp_offset); |
| } |
| |
| /* Emit code to restore saved registers using MOV insns. |
| First register is restored from CFA - CFA_OFFSET. */ |
| static void |
| ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, |
| bool maybe_eh_return) |
| { |
| struct machine_function *m = cfun->machine; |
| unsigned int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) |
| { |
| rtx reg = gen_rtx_REG (word_mode, regno); |
| rtx insn, mem; |
| |
| mem = choose_baseaddr (cfa_offset); |
| mem = gen_frame_mem (word_mode, mem); |
| insn = emit_move_insn (reg, mem); |
| |
| if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) |
| { |
| /* Previously we'd represented the CFA as an expression |
| like *(%ebp - 8). We've just popped that value from |
| the stack, which means we need to reset the CFA to |
| the drap register. This will remain until we restore |
| the stack pointer. */ |
| add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| /* This means that the DRAP register is valid for addressing. */ |
| m->fs.drap_valid = true; |
| } |
| else |
| ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset); |
| |
| cfa_offset -= UNITS_PER_WORD; |
| } |
| } |
| |
| /* Emit code to restore saved registers using MOV insns. |
| First register is restored from CFA - CFA_OFFSET. */ |
| static void |
| ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, |
| bool maybe_eh_return) |
| { |
| unsigned int regno; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) |
| { |
| rtx reg = gen_rtx_REG (V4SFmode, regno); |
| rtx mem; |
| |
| mem = choose_baseaddr (cfa_offset); |
| mem = gen_rtx_MEM (V4SFmode, mem); |
| set_mem_align (mem, 128); |
| emit_move_insn (reg, mem); |
| |
| ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset); |
| |
| cfa_offset -= 16; |
| } |
| } |
| |
| /* Restore function stack, frame, and registers. */ |
| |
| void |
| ix86_expand_epilogue (int style) |
| { |
| struct machine_function *m = cfun->machine; |
| struct machine_frame_state frame_state_save = m->fs; |
| struct ix86_frame frame; |
| bool restore_regs_via_mov; |
| bool using_drap; |
| |
| ix86_finalize_stack_realign_flags (); |
| ix86_compute_frame_layout (&frame); |
| |
| m->fs.sp_valid = (!frame_pointer_needed |
| || (crtl->sp_is_unchanging |
| && !stack_realign_fp)); |
| gcc_assert (!m->fs.sp_valid |
| || m->fs.sp_offset == frame.stack_pointer_offset); |
| |
| /* The FP must be valid if the frame pointer is present. */ |
| gcc_assert (frame_pointer_needed == m->fs.fp_valid); |
| gcc_assert (!m->fs.fp_valid |
| || m->fs.fp_offset == frame.hard_frame_pointer_offset); |
| |
| /* We must have *some* valid pointer to the stack frame. */ |
| gcc_assert (m->fs.sp_valid || m->fs.fp_valid); |
| |
| /* The DRAP is never valid at this point. */ |
| gcc_assert (!m->fs.drap_valid); |
| |
| /* See the comment about red zone and frame |
| pointer usage in ix86_expand_prologue. */ |
| if (frame_pointer_needed && frame.red_zone_size) |
| emit_insn (gen_memory_blockage ()); |
| |
| using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
| gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); |
| |
| /* Determine the CFA offset of the end of the red-zone. */ |
| m->fs.red_zone_offset = 0; |
| if (ix86_using_red_zone () && crtl->args.pops_args < 65536) |
| { |
| /* The red-zone begins below the return address. */ |
| m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD; |
| |
| /* When the register save area is in the aligned portion of |
| the stack, determine the maximum runtime displacement that |
| matches up with the aligned frame. */ |
| if (stack_realign_drap) |
| m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT |
| + UNITS_PER_WORD); |
| } |
| |
| /* Special care must be taken for the normal return case of a function |
| using eh_return: the eax and edx registers are marked as saved, but |
| not restored along this path. Adjust the save location to match. */ |
| if (crtl->calls_eh_return && style != 2) |
| frame.reg_save_offset -= 2 * UNITS_PER_WORD; |
| |
| /* EH_RETURN requires the use of moves to function properly. */ |
| if (crtl->calls_eh_return) |
| restore_regs_via_mov = true; |
| /* SEH requires the use of pops to identify the epilogue. */ |
| else if (TARGET_SEH) |
| restore_regs_via_mov = false; |
| /* If we're only restoring one register and sp is not valid then |
| using a move instruction to restore the register since it's |
| less work than reloading sp and popping the register. */ |
| else if (!m->fs.sp_valid && frame.nregs <= 1) |
| restore_regs_via_mov = true; |
| else if (TARGET_EPILOGUE_USING_MOVE |
| && cfun->machine->use_fast_prologue_epilogue |
| && (frame.nregs > 1 |
| || m->fs.sp_offset != frame.reg_save_offset)) |
| restore_regs_via_mov = true; |
| else if (frame_pointer_needed |
| && !frame.nregs |
| && m->fs.sp_offset != frame.reg_save_offset) |
| restore_regs_via_mov = true; |
| else if (frame_pointer_needed |
| && TARGET_USE_LEAVE |
| && cfun->machine->use_fast_prologue_epilogue |
| && frame.nregs == 1) |
| restore_regs_via_mov = true; |
| else |
| restore_regs_via_mov = false; |
| |
| if (restore_regs_via_mov || frame.nsseregs) |
| { |
| /* Ensure that the entire register save area is addressable via |
| the stack pointer, if we will restore via sp. */ |
| if (TARGET_64BIT |
| && m->fs.sp_offset > 0x7fffffff |
| && !(m->fs.fp_valid || m->fs.drap_valid) |
| && (frame.nsseregs + frame.nregs) != 0) |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (m->fs.sp_offset |
| - frame.sse_reg_save_offset), |
| style, |
| m->fs.cfa_reg == stack_pointer_rtx); |
| } |
| } |
| |
| /* If there are any SSE registers to restore, then we have to do it |
| via moves, since there's obviously no pop for SSE regs. */ |
| if (frame.nsseregs) |
| ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, |
| style == 2); |
| |
| if (restore_regs_via_mov) |
| { |
| rtx t; |
| |
| if (frame.nregs) |
| ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2); |
| |
| /* eh_return epilogues need %ecx added to the stack pointer. */ |
| if (style == 2) |
| { |
| rtx insn, sa = EH_RETURN_STACKADJ_RTX; |
| |
| /* Stack align doesn't work with eh_return. */ |
| gcc_assert (!stack_realign_drap); |
| /* Neither does regparm nested functions. */ |
| gcc_assert (!ix86_static_chain_on_stack); |
| |
| if (frame_pointer_needed) |
| { |
| t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
| t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); |
| emit_insn (gen_rtx_SET (VOIDmode, sa, t)); |
| |
| t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); |
| insn = emit_move_insn (hard_frame_pointer_rtx, t); |
| |
| /* Note that we use SA as a temporary CFA, as the return |
| address is at the proper place relative to it. We |
| pretend this happens at the FP restore insn because |
| prior to this insn the FP would be stored at the wrong |
| offset relative to SA, and after this insn we have no |
| other reasonable register to use for the CFA. We don't |
| bother resetting the CFA to the SP for the duration of |
| the return insn. */ |
| add_reg_note (insn, REG_CFA_DEF_CFA, |
| plus_constant (Pmode, sa, UNITS_PER_WORD)); |
| ix86_add_queued_cfa_restore_notes (insn); |
| add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| m->fs.cfa_reg = sa; |
| m->fs.cfa_offset = UNITS_PER_WORD; |
| m->fs.fp_valid = false; |
| |
| pro_epilogue_adjust_stack (stack_pointer_rtx, sa, |
| const0_rtx, style, false); |
| } |
| else |
| { |
| t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); |
| t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); |
| insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t)); |
| ix86_add_queued_cfa_restore_notes (insn); |
| |
| gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); |
| if (m->fs.cfa_offset != UNITS_PER_WORD) |
| { |
| m->fs.cfa_offset = UNITS_PER_WORD; |
| add_reg_note (insn, REG_CFA_DEF_CFA, |
| plus_constant (Pmode, stack_pointer_rtx, |
| UNITS_PER_WORD)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| m->fs.sp_offset = UNITS_PER_WORD; |
| m->fs.sp_valid = true; |
| } |
| } |
| else |
| { |
| /* SEH requires that the function end with (1) a stack adjustment |
| if necessary, (2) a sequence of pops, and (3) a return or |
| jump instruction. Prevent insns from the function body from |
| being scheduled into this sequence. */ |
| if (TARGET_SEH) |
| { |
| /* Prevent a catch region from being adjacent to the standard |
| epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor |
| several other flags that would be interesting to test are |
| not yet set up. */ |
| if (flag_non_call_exceptions) |
| emit_insn (gen_nops (const1_rtx)); |
| else |
| emit_insn (gen_blockage ()); |
| } |
| |
| /* First step is to deallocate the stack frame so that we can |
| pop the registers. Also do it on SEH target for very large |
| frame as the emitted instructions aren't allowed by the ABI in |
| epilogues. */ |
| if (!m->fs.sp_valid |
| || (TARGET_SEH |
| && (m->fs.sp_offset - frame.reg_save_offset |
| >= SEH_MAX_FRAME_SIZE))) |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, |
| GEN_INT (m->fs.fp_offset |
| - frame.reg_save_offset), |
| style, false); |
| } |
| else if (m->fs.sp_offset != frame.reg_save_offset) |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (m->fs.sp_offset |
| - frame.reg_save_offset), |
| style, |
| m->fs.cfa_reg == stack_pointer_rtx); |
| } |
| |
| ix86_emit_restore_regs_using_pop (); |
| } |
| |
| /* If we used a stack pointer and haven't already got rid of it, |
| then do so now. */ |
| if (m->fs.fp_valid) |
| { |
| /* If the stack pointer is valid and pointing at the frame |
| pointer store address, then we only need a pop. */ |
| if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset) |
| ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
| /* Leave results in shorter dependency chains on CPUs that are |
| able to grok it fast. */ |
| else if (TARGET_USE_LEAVE |
| || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| || !cfun->machine->use_fast_prologue_epilogue) |
| ix86_emit_leave (); |
| else |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| const0_rtx, style, !using_drap); |
| ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); |
| } |
| } |
| |
| if (using_drap) |
| { |
| int param_ptr_offset = UNITS_PER_WORD; |
| rtx insn; |
| |
| gcc_assert (stack_realign_drap); |
| |
| if (ix86_static_chain_on_stack) |
| param_ptr_offset += UNITS_PER_WORD; |
| if (!call_used_regs[REGNO (crtl->drap_reg)]) |
| param_ptr_offset += UNITS_PER_WORD; |
| |
| insn = emit_insn (gen_rtx_SET |
| (VOIDmode, stack_pointer_rtx, |
| gen_rtx_PLUS (Pmode, |
| crtl->drap_reg, |
| GEN_INT (-param_ptr_offset)))); |
| m->fs.cfa_reg = stack_pointer_rtx; |
| m->fs.cfa_offset = param_ptr_offset; |
| m->fs.sp_offset = param_ptr_offset; |
| m->fs.realigned = false; |
| |
| add_reg_note (insn, REG_CFA_DEF_CFA, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (param_ptr_offset))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| if (!call_used_regs[REGNO (crtl->drap_reg)]) |
| ix86_emit_restore_reg_using_pop (crtl->drap_reg); |
| } |
| |
| /* At this point the stack pointer must be valid, and we must have |
| restored all of the registers. We may not have deallocated the |
| entire stack frame. We've delayed this until now because it may |
| be possible to merge the local stack deallocation with the |
| deallocation forced by ix86_static_chain_on_stack. */ |
| gcc_assert (m->fs.sp_valid); |
| gcc_assert (!m->fs.fp_valid); |
| gcc_assert (!m->fs.realigned); |
| if (m->fs.sp_offset != UNITS_PER_WORD) |
| { |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), |
| style, true); |
| } |
| else |
| ix86_add_queued_cfa_restore_notes (get_last_insn ()); |
| |
| /* Sibcall epilogues don't want a return instruction. */ |
| if (style == 0) |
| { |
| m->fs = frame_state_save; |
| return; |
| } |
| |
| if (crtl->args.pops_args && crtl->args.size) |
| { |
| rtx popc = GEN_INT (crtl->args.pops_args); |
| |
| /* i386 can only pop 64K bytes. If asked to pop more, pop return |
| address, do explicit add, and jump indirectly to the caller. */ |
| |
| if (crtl->args.pops_args >= 65536) |
| { |
| rtx ecx = gen_rtx_REG (SImode, CX_REG); |
| rtx insn; |
| |
| /* There is no "pascal" calling convention in any 64bit ABI. */ |
| gcc_assert (!TARGET_64BIT); |
| |
| insn = emit_insn (gen_pop (ecx)); |
| m->fs.cfa_offset -= UNITS_PER_WORD; |
| m->fs.sp_offset -= UNITS_PER_WORD; |
| |
| rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); |
| x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, x); |
| add_reg_note (insn, REG_CFA_REGISTER, |
| gen_rtx_SET (VOIDmode, ecx, pc_rtx)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
| popc, -1, true); |
| emit_jump_insn (gen_simple_return_indirect_internal (ecx)); |
| } |
| else |
| emit_jump_insn (gen_simple_return_pop_internal (popc)); |
| } |
| else |
| emit_jump_insn (gen_simple_return_internal ()); |
| |
| /* Restore the state back to the state from the prologue, |
| so that it's correct for the next epilogue. */ |
| m->fs = frame_state_save; |
| } |
| |
| /* Reset from the function's potential modifications. */ |
| |
| static void |
| ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT size ATTRIBUTE_UNUSED) |
| { |
| if (pic_offset_table_rtx) |
| SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); |
| #if TARGET_MACHO |
| /* Mach-O doesn't support labels at the end of objects, so if |
| it looks like we might want one, insert a NOP. */ |
| { |
| rtx insn = get_last_insn (); |
| rtx deleted_debug_label = NULL_RTX; |
| while (insn |
| && NOTE_P (insn) |
| && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) |
| { |
| /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL |
| notes only, instead set their CODE_LABEL_NUMBER to -1, |
| otherwise there would be code generation differences |
| in between -g and -g0. */ |
| if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
| deleted_debug_label = insn; |
| insn = PREV_INSN (insn); |
| } |
| if (insn |
| && (LABEL_P (insn) |
| || (NOTE_P (insn) |
| && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))) |
| fputs ("\tnop\n", file); |
| else if (deleted_debug_label) |
| for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) |
| if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) |
| CODE_LABEL_NUMBER (insn) = -1; |
| } |
| #endif |
| |
| } |
| |
| /* Return a scratch register to use in the split stack prologue. The |
| split stack prologue is used for -fsplit-stack. It is the first |
| instructions in the function, even before the regular prologue. |
| The scratch register can be any caller-saved register which is not |
| used for parameters or for the static chain. */ |
| |
| static unsigned int |
| split_stack_prologue_scratch_regno (void) |
| { |
| if (TARGET_64BIT) |
| return R11_REG; |
| else |
| { |
| bool is_fastcall, is_thiscall; |
| int regparm; |
| |
| is_fastcall = (lookup_attribute ("fastcall", |
| TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
| != NULL); |
| is_thiscall = (lookup_attribute ("thiscall", |
| TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) |
| != NULL); |
| regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); |
| |
| if (is_fastcall) |
| { |
| if (DECL_STATIC_CHAIN (cfun->decl)) |
| { |
| sorry ("-fsplit-stack does not support fastcall with " |
| "nested function"); |
| return INVALID_REGNUM; |
| } |
| return AX_REG; |
| } |
| else if (is_thiscall) |
| { |
| if (!DECL_STATIC_CHAIN (cfun->decl)) |
| return DX_REG; |
| return AX_REG; |
| } |
| else if (regparm < 3) |
| { |
| if (!DECL_STATIC_CHAIN (cfun->decl)) |
| return CX_REG; |
| else |
| { |
| if (regparm >= 2) |
| { |
| sorry ("-fsplit-stack does not support 2 register " |
| "parameters for a nested function"); |
| return INVALID_REGNUM; |
| } |
| return DX_REG; |
| } |
| } |
| else |
| { |
| /* FIXME: We could make this work by pushing a register |
| around the addition and comparison. */ |
| sorry ("-fsplit-stack does not support 3 register parameters"); |
| return INVALID_REGNUM; |
| } |
| } |
| } |
| |
| /* A SYMBOL_REF for the function which allocates new stackspace for |
| -fsplit-stack. */ |
| |
| static GTY(()) rtx split_stack_fn; |
| |
| /* A SYMBOL_REF for the more stack function when using the large |
| model. */ |
| |
| static GTY(()) rtx split_stack_fn_large; |
| |
| /* Handle -fsplit-stack. These are the first instructions in the |
| function, even before the regular prologue. */ |
| |
| void |
| ix86_expand_split_stack_prologue (void) |
| { |
| struct ix86_frame frame; |
| HOST_WIDE_INT allocate; |
| unsigned HOST_WIDE_INT args_size; |
| rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage; |
| rtx scratch_reg = NULL_RTX; |
| rtx varargs_label = NULL_RTX; |
| rtx fn; |
| |
| gcc_assert (flag_split_stack && reload_completed); |
| |
| ix86_finalize_stack_realign_flags (); |
| ix86_compute_frame_layout (&frame); |
| allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; |
| |
| /* This is the label we will branch to if we have enough stack |
| space. We expect the basic block reordering pass to reverse this |
| branch if optimizing, so that we branch in the unlikely case. */ |
| label = gen_label_rtx (); |
| |
| /* We need to compare the stack pointer minus the frame size with |
| the stack boundary in the TCB. The stack boundary always gives |
| us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we |
| can compare directly. Otherwise we need to do an addition. */ |
| |
| limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
| UNSPEC_STACK_CHECK); |
| limit = gen_rtx_CONST (Pmode, limit); |
| limit = gen_rtx_MEM (Pmode, limit); |
| if (allocate < SPLIT_STACK_AVAILABLE) |
| current = stack_pointer_rtx; |
| else |
| { |
| unsigned int scratch_regno; |
| rtx offset; |
| |
| /* We need a scratch register to hold the stack pointer minus |
| the required frame size. Since this is the very start of the |
| function, the scratch register can be any caller-saved |
| register which is not used for parameters. */ |
| offset = GEN_INT (- allocate); |
| scratch_regno = split_stack_prologue_scratch_regno (); |
| if (scratch_regno == INVALID_REGNUM) |
| return; |
| scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) |
| { |
| /* We don't use ix86_gen_add3 in this case because it will |
| want to split to lea, but when not optimizing the insn |
| will not be split after this point. */ |
| emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| offset))); |
| } |
| else |
| { |
| emit_move_insn (scratch_reg, offset); |
| emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, |
| stack_pointer_rtx)); |
| } |
| current = scratch_reg; |
| } |
| |
| ix86_expand_branch (GEU, current, limit, label); |
| jump_insn = get_last_insn (); |
| JUMP_LABEL (jump_insn) = label; |
| |
| /* Mark the jump as very likely to be taken. */ |
| add_int_reg_note (jump_insn, REG_BR_PROB, |
| REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100); |
| |
| if (split_stack_fn == NULL_RTX) |
| split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); |
| fn = split_stack_fn; |
| |
| /* Get more stack space. We pass in the desired stack space and the |
| size of the arguments to copy to the new stack. In 32-bit mode |
| we push the parameters; __morestack will return on a new stack |
| anyhow. In 64-bit mode we pass the parameters in r10 and |
| r11. */ |
| allocate_rtx = GEN_INT (allocate); |
| args_size = crtl->args.size >= 0 ? crtl->args.size : 0; |
| call_fusage = NULL_RTX; |
| if (TARGET_64BIT) |
| { |
| rtx reg10, reg11; |
| |
| reg10 = gen_rtx_REG (Pmode, R10_REG); |
| reg11 = gen_rtx_REG (Pmode, R11_REG); |
| |
| /* If this function uses a static chain, it will be in %r10. |
| Preserve it across the call to __morestack. */ |
| if (DECL_STATIC_CHAIN (cfun->decl)) |
| { |
| rtx rax; |
| |
| rax = gen_rtx_REG (word_mode, AX_REG); |
| emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); |
| use_reg (&call_fusage, rax); |
| } |
| |
| if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) |
| && !TARGET_PECOFF) |
| { |
| HOST_WIDE_INT argval; |
| |
| gcc_assert (Pmode == DImode); |
| /* When using the large model we need to load the address |
| into a register, and we've run out of registers. So we |
| switch to a different calling convention, and we call a |
| different function: __morestack_large. We pass the |
| argument size in the upper 32 bits of r10 and pass the |
| frame size in the lower 32 bits. */ |
| gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate); |
| gcc_assert ((args_size & 0xffffffff) == args_size); |
| |
| if (split_stack_fn_large == NULL_RTX) |
| split_stack_fn_large = |
| gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); |
| |
| if (ix86_cmodel == CM_LARGE_PIC) |
| { |
| rtx label, x; |
| |
| label = gen_label_rtx (); |
| emit_label (label); |
| LABEL_PRESERVE_P (label) = 1; |
| emit_insn (gen_set_rip_rex64 (reg10, label)); |
| emit_insn (gen_set_got_offset_rex64 (reg11, label)); |
| emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); |
| x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), |
| UNSPEC_GOT); |
| x = gen_rtx_CONST (Pmode, x); |
| emit_move_insn (reg11, x); |
| x = gen_rtx_PLUS (Pmode, reg10, reg11); |
| x = gen_const_mem (Pmode, x); |
| emit_move_insn (reg11, x); |
| } |
| else |
| emit_move_insn (reg11, split_stack_fn_large); |
| |
| fn = reg11; |
| |
| argval = ((args_size << 16) << 16) + allocate; |
| emit_move_insn (reg10, GEN_INT (argval)); |
| } |
| else |
| { |
| emit_move_insn (reg10, allocate_rtx); |
| emit_move_insn (reg11, GEN_INT (args_size)); |
| use_reg (&call_fusage, reg11); |
| } |
| |
| use_reg (&call_fusage, reg10); |
| } |
| else |
| { |
| emit_insn (gen_push (GEN_INT (args_size))); |
| emit_insn (gen_push (allocate_rtx)); |
| } |
| call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), |
| GEN_INT (UNITS_PER_WORD), constm1_rtx, |
| NULL_RTX, false); |
| add_function_usage_to (call_insn, call_fusage); |
| |
| /* In order to make call/return prediction work right, we now need |
| to execute a return instruction. See |
| libgcc/config/i386/morestack.S for the details on how this works. |
| |
| For flow purposes gcc must not see this as a return |
| instruction--we need control flow to continue at the subsequent |
| label. Therefore, we use an unspec. */ |
| gcc_assert (crtl->args.pops_args < 65536); |
| emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); |
| |
| /* If we are in 64-bit mode and this function uses a static chain, |
| we saved %r10 in %rax before calling _morestack. */ |
| if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) |
| emit_move_insn (gen_rtx_REG (word_mode, R10_REG), |
| gen_rtx_REG (word_mode, AX_REG)); |
| |
| /* If this function calls va_start, we need to store a pointer to |
| the arguments on the old stack, because they may not have been |
| all copied to the new stack. At this point the old stack can be |
| found at the frame pointer value used by __morestack, because |
| __morestack has set that up before calling back to us. Here we |
| store that pointer in a scratch register, and in |
| ix86_expand_prologue we store the scratch register in a stack |
| slot. */ |
| if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| { |
| unsigned int scratch_regno; |
| rtx frame_reg; |
| int words; |
| |
| scratch_regno = split_stack_prologue_scratch_regno (); |
| scratch_reg = gen_rtx_REG (Pmode, scratch_regno); |
| frame_reg = gen_rtx_REG (Pmode, BP_REG); |
| |
| /* 64-bit: |
| fp -> old fp value |
| return address within this function |
| return address of caller of this function |
| stack arguments |
| So we add three words to get to the stack arguments. |
| |
| 32-bit: |
| fp -> old fp value |
| return address within this function |
| first argument to __morestack |
| second argument to __morestack |
| return address of caller of this function |
| stack arguments |
| So we add five words to get to the stack arguments. |
| */ |
| words = TARGET_64BIT ? 3 : 5; |
| emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, |
| gen_rtx_PLUS (Pmode, frame_reg, |
| GEN_INT (words * UNITS_PER_WORD)))); |
| |
| varargs_label = gen_label_rtx (); |
| emit_jump_insn (gen_jump (varargs_label)); |
| JUMP_LABEL (get_last_insn ()) = varargs_label; |
| |
| emit_barrier (); |
| } |
| |
| emit_label (label); |
| LABEL_NUSES (label) = 1; |
| |
| /* If this function calls va_start, we now have to set the scratch |
| register for the case where we do not call __morestack. In this |
| case we need to set it based on the stack pointer. */ |
| if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| { |
| emit_insn (gen_rtx_SET (VOIDmode, scratch_reg, |
| gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (UNITS_PER_WORD)))); |
| |
| emit_label (varargs_label); |
| LABEL_NUSES (varargs_label) = 1; |
| } |
| } |
| |
| /* We may have to tell the dataflow pass that the split stack prologue |
| is initializing a scratch register. */ |
| |
| static void |
| ix86_live_on_entry (bitmap regs) |
| { |
| if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) |
| { |
| gcc_assert (flag_split_stack); |
| bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); |
| } |
| } |
| |
| /* Extract the parts of an RTL expression that is a valid memory address |
| for an instruction. Return 0 if the structure of the address is |
| grossly off. Return -1 if the address contains ASHIFT, so it is not |
| strictly valid, but still used for computing length of lea instruction. */ |
| |
| int |
| ix86_decompose_address (rtx addr, struct ix86_address *out) |
| { |
| rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
| rtx base_reg, index_reg; |
| HOST_WIDE_INT scale = 1; |
| rtx scale_rtx = NULL_RTX; |
| rtx tmp; |
| int retval = 1; |
| enum ix86_address_seg seg = SEG_DEFAULT; |
| |
| /* Allow zero-extended SImode addresses, |
| they will be emitted with addr32 prefix. */ |
| if (TARGET_64BIT && GET_MODE (addr) == DImode) |
| { |
| if (GET_CODE (addr) == ZERO_EXTEND |
| && GET_MODE (XEXP (addr, 0)) == SImode) |
| { |
| addr = XEXP (addr, 0); |
| if (CONST_INT_P (addr)) |
| return 0; |
| } |
| else if (GET_CODE (addr) == AND |
| && const_32bit_mask (XEXP (addr, 1), DImode)) |
| { |
| addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0); |
| if (addr == NULL_RTX) |
| return 0; |
| |
| if (CONST_INT_P (addr)) |
| return 0; |
| } |
| } |
| |
| /* Allow SImode subregs of DImode addresses, |
| they will be emitted with addr32 prefix. */ |
| if (TARGET_64BIT && GET_MODE (addr) == SImode) |
| { |
| if (GET_CODE (addr) == SUBREG |
| && GET_MODE (SUBREG_REG (addr)) == DImode) |
| { |
| addr = SUBREG_REG (addr); |
| if (CONST_INT_P (addr)) |
| return 0; |
| } |
| } |
| |
| if (REG_P (addr)) |
| base = addr; |
| else if (GET_CODE (addr) == SUBREG) |
| { |
| if (REG_P (SUBREG_REG (addr))) |
| base = addr; |
| else |
| return 0; |
| } |
| else if (GET_CODE (addr) == PLUS) |
| { |
| rtx addends[4], op; |
| int n = 0, i; |
| |
| op = addr; |
| do |
| { |
| if (n >= 4) |
| return 0; |
| addends[n++] = XEXP (op, 1); |
| op = XEXP (op, 0); |
| } |
| while (GET_CODE (op) == PLUS); |
| if (n >= 4) |
| return 0; |
| addends[n] = op; |
| |
| for (i = n; i >= 0; --i) |
| { |
| op = addends[i]; |
| switch (GET_CODE (op)) |
| { |
| case MULT: |
| if (index) |
| return 0; |
| index = XEXP (op, 0); |
| scale_rtx = XEXP (op, 1); |
| break; |
| |
| case ASHIFT: |
| if (index) |
| return 0; |
| index = XEXP (op, 0); |
| tmp = XEXP (op, 1); |
| if (!CONST_INT_P (tmp)) |
| return 0; |
| scale = INTVAL (tmp); |
| if ((unsigned HOST_WIDE_INT) scale > 3) |
| return 0; |
| scale = 1 << scale; |
| break; |
| |
| case ZERO_EXTEND: |
| op = XEXP (op, 0); |
| if (GET_CODE (op) != UNSPEC) |
| return 0; |
| /* FALLTHRU */ |
| |
| case UNSPEC: |
| if (XINT (op, 1) == UNSPEC_TP |
| && TARGET_TLS_DIRECT_SEG_REFS |
| && seg == SEG_DEFAULT) |
| seg = DEFAULT_TLS_SEG_REG; |
| else |
| return 0; |
| break; |
| |
| case SUBREG: |
| if (!REG_P (SUBREG_REG (op))) |
| return 0; |
| /* FALLTHRU */ |
| |
| case REG: |
| if (!base) |
| base = op; |
| else if (!index) |
| index = op; |
| else |
| return 0; |
| break; |
| |
| case CONST: |
| case CONST_INT: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| if (disp) |
| return 0; |
| disp = op; |
| break; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| else if (GET_CODE (addr) == MULT) |
| { |
| index = XEXP (addr, 0); /* index*scale */ |
| scale_rtx = XEXP (addr, 1); |
| } |
| else if (GET_CODE (addr) == ASHIFT) |
| { |
| /* We're called for lea too, which implements ashift on occasion. */ |
| index = XEXP (addr, 0); |
| tmp = XEXP (addr, 1); |
| if (!CONST_INT_P (tmp)) |
| return 0; |
| scale = INTVAL (tmp); |
| if ((unsigned HOST_WIDE_INT) scale > 3) |
| return 0; |
| scale = 1 << scale; |
| retval = -1; |
| } |
| else |
| disp = addr; /* displacement */ |
| |
| if (index) |
| { |
| if (REG_P (index)) |
| ; |
| else if (GET_CODE (index) == SUBREG |
| && REG_P (SUBREG_REG (index))) |
| ; |
| else |
| return 0; |
| } |
| |
| /* Extract the integral value of scale. */ |
| if (scale_rtx) |
| { |
| if (!CONST_INT_P (scale_rtx)) |
| return 0; |
| scale = INTVAL (scale_rtx); |
| } |
| |
| base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; |
| index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; |
| |
| /* Avoid useless 0 displacement. */ |
| if (disp == const0_rtx && (base || index)) |
| disp = NULL_RTX; |
| |
| /* Allow arg pointer and stack pointer as index if there is not scaling. */ |
| if (base_reg && index_reg && scale == 1 |
| && (index_reg == arg_pointer_rtx |
| || index_reg == frame_pointer_rtx |
| || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) |
| { |
| rtx tmp; |
| tmp = base, base = index, index = tmp; |
| tmp = base_reg, base_reg = index_reg, index_reg = tmp; |
| } |
| |
| /* Special case: %ebp cannot be encoded as a base without a displacement. |
| Similarly %r13. */ |
| if (!disp |
| && base_reg |
| && (base_reg == hard_frame_pointer_rtx |
| || base_reg == frame_pointer_rtx |
| || base_reg == arg_pointer_rtx |
| || (REG_P (base_reg) |
| && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM |
| || REGNO (base_reg) == R13_REG)))) |
| disp = const0_rtx; |
| |
| /* Special case: on K6, [%esi] makes the instruction vector decoded. |
| Avoid this by transforming to [%esi+0]. |
| Reload calls address legitimization without cfun defined, so we need |
| to test cfun for being non-NULL. */ |
| if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) |
| && base_reg && !index_reg && !disp |
| && REG_P (base_reg) && REGNO (base_reg) == SI_REG) |
| disp = const0_rtx; |
| |
| /* Special case: encode reg+reg instead of reg*2. */ |
| if (!base && index && scale == 2) |
| base = index, base_reg = index_reg, scale = 1; |
| |
| /* Special case: scaling cannot be encoded without base or displacement. */ |
| if (!base && !disp && index && scale != 1) |
| disp = const0_rtx; |
| |
| out->base = base; |
| out->index = index; |
| out->disp = disp; |
| out->scale = scale; |
| out->seg = seg; |
| |
| return retval; |
| } |
| |
| /* Return cost of the memory address x. |
| For i386, it is better to use a complex address than let gcc copy |
| the address into a reg and make a new pseudo. But not if the address |
| requires to two regs - that would mean more pseudos with longer |
| lifetimes. */ |
| static int |
| ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, |
| addr_space_t as ATTRIBUTE_UNUSED, |
| bool speed ATTRIBUTE_UNUSED) |
| { |
| struct ix86_address parts; |
| int cost = 1; |
| int ok = ix86_decompose_address (x, &parts); |
| |
| gcc_assert (ok); |
| |
| if (parts.base && GET_CODE (parts.base) == SUBREG) |
| parts.base = SUBREG_REG (parts.base); |
| if (parts.index && GET_CODE (parts.index) == SUBREG) |
| parts.index = SUBREG_REG (parts.index); |
| |
| /* Attempt to minimize number of registers in the address. */ |
| if ((parts.base |
| && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) |
| || (parts.index |
| && (!REG_P (parts.index) |
| || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) |
| cost++; |
| |
| if (parts.base |
| && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) |
| && parts.index |
| && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) |
| && parts.base != parts.index) |
| cost++; |
| |
| /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, |
| since it's predecode logic can't detect the length of instructions |
| and it degenerates to vector decoded. Increase cost of such |
| addresses here. The penalty is minimally 2 cycles. It may be worthwhile |
| to split such addresses or even refuse such addresses at all. |
| |
| Following addressing modes are affected: |
| [base+scale*index] |
| [scale*index+disp] |
| [base+index] |
| |
| The first and last case may be avoidable by explicitly coding the zero in |
| memory address, but I don't have AMD-K6 machine handy to check this |
| theory. */ |
| |
| if (TARGET_K6 |
| && ((!parts.disp && parts.base && parts.index && parts.scale != 1) |
| || (parts.disp && !parts.base && parts.index && parts.scale != 1) |
| || (!parts.disp && parts.base && parts.index && parts.scale == 1))) |
| cost += 10; |
| |
| return cost; |
| } |
| |
| /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as |
| this is used for to form addresses to local data when -fPIC is in |
| use. */ |
| |
| static bool |
| darwin_local_data_pic (rtx disp) |
| { |
| return (GET_CODE (disp) == UNSPEC |
| && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); |
| } |
| |
| /* Determine if a given RTX is a valid constant. We already know this |
| satisfies CONSTANT_P. */ |
| |
| static bool |
| ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| switch (GET_CODE (x)) |
| { |
| case CONST: |
| x = XEXP (x, 0); |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| if (!CONST_INT_P (XEXP (x, 1))) |
| return false; |
| x = XEXP (x, 0); |
| } |
| |
| if (TARGET_MACHO && darwin_local_data_pic (x)) |
| return true; |
| |
| /* Only some unspecs are valid as "constants". */ |
| if (GET_CODE (x) == UNSPEC) |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_GOT: |
| case UNSPEC_GOTOFF: |
| case UNSPEC_PLTOFF: |
| return TARGET_64BIT; |
| case UNSPEC_TPOFF: |
| case UNSPEC_NTPOFF: |
| x = XVECEXP (x, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| case UNSPEC_DTPOFF: |
| x = XVECEXP (x, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); |
| default: |
| return false; |
| } |
| |
| /* We must have drilled down to a symbol. */ |
| if (GET_CODE (x) == LABEL_REF) |
| return true; |
| if (GET_CODE (x) != SYMBOL_REF) |
| return false; |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| /* TLS symbols are never valid. */ |
| if (SYMBOL_REF_TLS_MODEL (x)) |
| return false; |
| |
| /* DLLIMPORT symbols are never valid. */ |
| if (TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| && SYMBOL_REF_DLLIMPORT_P (x)) |
| return false; |
| |
| #if TARGET_MACHO |
| /* mdynamic-no-pic */ |
| if (MACHO_DYNAMIC_NO_PIC_P) |
| return machopic_symbol_defined_p (x); |
| #endif |
| break; |
| |
| case CONST_DOUBLE: |
| if (GET_MODE (x) == TImode |
| && x != CONST0_RTX (TImode) |
| && !TARGET_64BIT) |
| return false; |
| break; |
| |
| case CONST_VECTOR: |
| if (!standard_sse_constant_p (x)) |
| return false; |
| |
| default: |
| break; |
| } |
| |
| /* Otherwise we handle everything else in the move patterns. */ |
| return true; |
| } |
| |
| /* Determine if it's legal to put X into the constant pool. This |
| is not possible for the address of thread-local symbols, which |
| is checked above. */ |
| |
| static bool |
| ix86_cannot_force_const_mem (enum machine_mode mode, rtx x) |
| { |
| /* We can always put integral constants and vectors in memory. */ |
| switch (GET_CODE (x)) |
| { |
| case CONST_INT: |
| case CONST_DOUBLE: |
| case CONST_VECTOR: |
| return false; |
| |
| default: |
| break; |
| } |
| return !ix86_legitimate_constant_p (mode, x); |
| } |
| |
| /* Nonzero if the symbol is marked as dllimport, or as stub-variable, |
| otherwise zero. */ |
| |
| static bool |
| is_imported_p (rtx x) |
| { |
| if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| || GET_CODE (x) != SYMBOL_REF) |
| return false; |
| |
| return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); |
| } |
| |
| |
| /* Nonzero if the constant value X is a legitimate general operand |
| when generating PIC code. It is given that flag_pic is on and |
| that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ |
| |
| bool |
| legitimate_pic_operand_p (rtx x) |
| { |
| rtx inner; |
| |
| switch (GET_CODE (x)) |
| { |
| case CONST: |
| inner = XEXP (x, 0); |
| if (GET_CODE (inner) == PLUS |
| && CONST_INT_P (XEXP (inner, 1))) |
| inner = XEXP (inner, 0); |
| |
| /* Only some unspecs are valid as "constants". */ |
| if (GET_CODE (inner) == UNSPEC) |
| switch (XINT (inner, 1)) |
| { |
| case UNSPEC_GOT: |
| case UNSPEC_GOTOFF: |
| case UNSPEC_PLTOFF: |
| return TARGET_64BIT; |
| case UNSPEC_TPOFF: |
| x = XVECEXP (inner, 0, 0); |
| return (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); |
| case UNSPEC_MACHOPIC_OFFSET: |
| return legitimate_pic_address_disp_p (x); |
| default: |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return legitimate_pic_address_disp_p (x); |
| |
| default: |
| return true; |
| } |
| } |
| |
| /* Determine if a given CONST RTX is a valid memory displacement |
| in PIC mode. */ |
| |
| bool |
| legitimate_pic_address_disp_p (rtx disp) |
| { |
| bool saw_plus; |
| |
| /* In 64bit mode we can allow direct addresses of symbols and labels |
| when they are not dynamic symbols. */ |
| if (TARGET_64BIT) |
| { |
| rtx op0 = disp, op1; |
| |
| switch (GET_CODE (disp)) |
| { |
| case LABEL_REF: |
| return true; |
| |
| case CONST: |
| if (GET_CODE (XEXP (disp, 0)) != PLUS) |
| break; |
| op0 = XEXP (XEXP (disp, 0), 0); |
| op1 = XEXP (XEXP (disp, 0), 1); |
| if (!CONST_INT_P (op1) |
| || INTVAL (op1) >= 16*1024*1024 |
| || INTVAL (op1) < -16*1024*1024) |
| break; |
| if (GET_CODE (op0) == LABEL_REF) |
| return true; |
| if (GET_CODE (op0) == CONST |
| && GET_CODE (XEXP (op0, 0)) == UNSPEC |
| && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) |
| return true; |
| if (GET_CODE (op0) == UNSPEC |
| && XINT (op0, 1) == UNSPEC_PCREL) |
| return true; |
| if (GET_CODE (op0) != SYMBOL_REF) |
| break; |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| /* TLS references should always be enclosed in UNSPEC. |
| The dllimported symbol needs always to be resolved. */ |
| if (SYMBOL_REF_TLS_MODEL (op0) |
| || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) |
| return false; |
| |
| if (TARGET_PECOFF) |
| { |
| if (is_imported_p (op0)) |
| return true; |
| |
| if (SYMBOL_REF_FAR_ADDR_P (op0) |
| || !SYMBOL_REF_LOCAL_P (op0)) |
| break; |
| |
| /* Function-symbols need to be resolved only for |
| large-model. |
| For the small-model we don't need to resolve anything |
| here. */ |
| if ((ix86_cmodel != CM_LARGE_PIC |
| && SYMBOL_REF_FUNCTION_P (op0)) |
| || ix86_cmodel == CM_SMALL_PIC) |
| return true; |
| /* Non-external symbols don't need to be resolved for |
| large, and medium-model. */ |
| if ((ix86_cmodel == CM_LARGE_PIC |
| || ix86_cmodel == CM_MEDIUM_PIC) |
| && !SYMBOL_REF_EXTERNAL_P (op0)) |
| return true; |
| } |
| else if (!SYMBOL_REF_FAR_ADDR_P (op0) |
| && SYMBOL_REF_LOCAL_P (op0) |
| && ix86_cmodel != CM_LARGE_PIC) |
| return true; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| if (GET_CODE (disp) != CONST) |
| return false; |
| disp = XEXP (disp, 0); |
| |
| if (TARGET_64BIT) |
| { |
| /* We are unsafe to allow PLUS expressions. This limit allowed distance |
| of GOT tables. We should not need these anyway. */ |
| if (GET_CODE (disp) != UNSPEC |
| || (XINT (disp, 1) != UNSPEC_GOTPCREL |
| && XINT (disp, 1) != UNSPEC_GOTOFF |
| && XINT (disp, 1) != UNSPEC_PCREL |
| && XINT (disp, 1) != UNSPEC_PLTOFF)) |
| return false; |
| |
| if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF |
| && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) |
| return false; |
| return true; |
| } |
| |
| saw_plus = false; |
| if (GET_CODE (disp) == PLUS) |
| { |
| if (!CONST_INT_P (XEXP (disp, 1))) |
| return false; |
| disp = XEXP (disp, 0); |
| saw_plus = true; |
| } |
| |
| if (TARGET_MACHO && darwin_local_data_pic (disp)) |
| return true; |
| |
| if (GET_CODE (disp) != UNSPEC) |
| return false; |
| |
| switch (XINT (disp, 1)) |
| { |
| case UNSPEC_GOT: |
| if (saw_plus) |
| return false; |
| /* We need to check for both symbols and labels because VxWorks loads |
| text labels with @GOT rather than @GOTOFF. See gotoff_operand for |
| details. */ |
| return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
| || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); |
| case UNSPEC_GOTOFF: |
| /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. |
| While ABI specify also 32bit relocation but we don't produce it in |
| small PIC model at all. */ |
| if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF |
| || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) |
| && !TARGET_64BIT) |
| return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); |
| return false; |
| case UNSPEC_GOTTPOFF: |
| case UNSPEC_GOTNTPOFF: |
| case UNSPEC_INDNTPOFF: |
| if (saw_plus) |
| return false; |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); |
| case UNSPEC_NTPOFF: |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); |
| case UNSPEC_DTPOFF: |
| disp = XVECEXP (disp, 0, 0); |
| return (GET_CODE (disp) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); |
| } |
| |
| return false; |
| } |
| |
| /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to |
| replace the input X, or the original X if no replacement is called for. |
| The output parameter *WIN is 1 if the calling macro should goto WIN, |
| 0 if it should not. */ |
| |
| bool |
| ix86_legitimize_reload_address (rtx x, |
| enum machine_mode mode ATTRIBUTE_UNUSED, |
| int opnum, int type, |
| int ind_levels ATTRIBUTE_UNUSED) |
| { |
| /* Reload can generate: |
| |
| (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP) |
| (reg:DI 97)) |
| (reg:DI 2 cx)) |
| |
| This RTX is rejected from ix86_legitimate_address_p due to |
| non-strictness of base register 97. Following this rejection, |
| reload pushes all three components into separate registers, |
| creating invalid memory address RTX. |
| |
| Following code reloads only the invalid part of the |
| memory address RTX. */ |
| |
| if (GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 1)) |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && REG_P (XEXP (XEXP (x, 0), 1))) |
| { |
| rtx base, index; |
| bool something_reloaded = false; |
| |
| base = XEXP (XEXP (x, 0), 1); |
| if (!REG_OK_FOR_BASE_STRICT_P (base)) |
| { |
| push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL, |
| BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, |
| opnum, (enum reload_type) type); |
| something_reloaded = true; |
| } |
| |
| index = XEXP (x, 1); |
| if (!REG_OK_FOR_INDEX_STRICT_P (index)) |
| { |
| push_reload (index, NULL_RTX, &XEXP (x, 1), NULL, |
| INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, |
| opnum, (enum reload_type) type); |
| something_reloaded = true; |
| } |
| |
| gcc_assert (something_reloaded); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Determine if op is suitable RTX for an address register. |
| Return naked register if a register or a register subreg is |
| found, otherwise return NULL_RTX. */ |
| |
| static rtx |
| ix86_validate_address_register (rtx op) |
| { |
| enum machine_mode mode = GET_MODE (op); |
| |
| /* Only SImode or DImode registers can form the address. */ |
| if (mode != SImode && mode != DImode) |
| return NULL_RTX; |
| |
| if (REG_P (op)) |
| return op; |
| else if (GET_CODE (op) == SUBREG) |
| { |
| rtx reg = SUBREG_REG (op); |
| |
| if (!REG_P (reg)) |
| return NULL_RTX; |
| |
| mode = GET_MODE (reg); |
| |
| /* Don't allow SUBREGs that span more than a word. It can |
| lead to spill failures when the register is one word out |
| of a two word structure. */ |
| if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) |
| return NULL_RTX; |
| |
| /* Allow only SUBREGs of non-eliminable hard registers. */ |
| if (register_no_elim_operand (reg, mode)) |
| return reg; |
| } |
| |
| /* Op is not a register. */ |
| return NULL_RTX; |
| } |
| |
| /* Recognizes RTL expressions that are valid memory addresses for an |
| instruction. The MODE argument is the machine mode for the MEM |
| expression that wants to use this address. |
| |
| It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
| convert common non-canonical forms to canonical form so that they will |
| be recognized. */ |
| |
| static bool |
| ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
| rtx addr, bool strict) |
| { |
| struct ix86_address parts; |
| rtx base, index, disp; |
| HOST_WIDE_INT scale; |
| enum ix86_address_seg seg; |
| |
| if (ix86_decompose_address (addr, &parts) <= 0) |
| /* Decomposition failed. */ |
| return false; |
| |
| base = parts.base; |
| index = parts.index; |
| disp = parts.disp; |
| scale = parts.scale; |
| seg = parts.seg; |
| |
| /* Validate base register. */ |
| if (base) |
| { |
| rtx reg = ix86_validate_address_register (base); |
| |
| if (reg == NULL_RTX) |
| return false; |
| |
| if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) |
| || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) |
| /* Base is not valid. */ |
| return false; |
| } |
| |
| /* Validate index register. */ |
| if (index) |
| { |
| rtx reg = ix86_validate_address_register (index); |
| |
| if (reg == NULL_RTX) |
| return false; |
| |
| if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) |
| || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) |
| /* Index is not valid. */ |
| return false; |
| } |
| |
| /* Index and base should have the same mode. */ |
| if (base && index |
| && GET_MODE (base) != GET_MODE (index)) |
| return false; |
| |
| /* Address override works only on the (%reg) part of %fs:(%reg). */ |
| if (seg != SEG_DEFAULT |
| && ((base && GET_MODE (base) != word_mode) |
| || (index && GET_MODE (index) != word_mode))) |
| return false; |
| |
| /* Validate scale factor. */ |
| if (scale != 1) |
| { |
| if (!index) |
| /* Scale without index. */ |
| return false; |
| |
| if (scale != 2 && scale != 4 && scale != 8) |
| /* Scale is not a valid multiplier. */ |
| return false; |
| } |
| |
| /* Validate displacement. */ |
| if (disp) |
| { |
| if (GET_CODE (disp) == CONST |
| && GET_CODE (XEXP (disp, 0)) == UNSPEC |
| && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
| switch (XINT (XEXP (disp, 0), 1)) |
| { |
| /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when |
| used. While ABI specify also 32bit relocations, we don't produce |
| them at all and use IP relative instead. */ |
| case UNSPEC_GOT: |
| case UNSPEC_GOTOFF: |
| gcc_assert (flag_pic); |
| if (!TARGET_64BIT) |
| goto is_legitimate_pic; |
| |
| /* 64bit address unspec. */ |
| return false; |
| |
| case UNSPEC_GOTPCREL: |
| case UNSPEC_PCREL: |
| gcc_assert (flag_pic); |
| goto is_legitimate_pic; |
| |
| case UNSPEC_GOTTPOFF: |
| case UNSPEC_GOTNTPOFF: |
| case UNSPEC_INDNTPOFF: |
| case UNSPEC_NTPOFF: |
| case UNSPEC_DTPOFF: |
| break; |
| |
| case UNSPEC_STACK_CHECK: |
| gcc_assert (flag_split_stack); |
| break; |
| |
| default: |
| /* Invalid address unspec. */ |
| return false; |
| } |
| |
| else if (SYMBOLIC_CONST (disp) |
| && (flag_pic |
| || (TARGET_MACHO |
| #if TARGET_MACHO |
| && MACHOPIC_INDIRECT |
| && !machopic_operand_p (disp) |
| #endif |
| ))) |
| { |
| |
| is_legitimate_pic: |
| if (TARGET_64BIT && (index || base)) |
| { |
| /* foo@dtpoff(%rX) is ok. */ |
| if (GET_CODE (disp) != CONST |
| || GET_CODE (XEXP (disp, 0)) != PLUS |
| || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC |
| || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
| || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
| && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) |
| /* Non-constant pic memory reference. */ |
| return false; |
| } |
| else if ((!TARGET_MACHO || flag_pic) |
| && ! legitimate_pic_address_disp_p (disp)) |
| /* Displacement is an invalid pic construct. */ |
| return false; |
| #if TARGET_MACHO |
| else if (MACHO_DYNAMIC_NO_PIC_P |
| && !ix86_legitimate_constant_p (Pmode, disp)) |
| /* displacment must be referenced via non_lazy_pointer */ |
| return false; |
| #endif |
| |
| /* This code used to verify that a symbolic pic displacement |
| includes the pic_offset_table_rtx register. |
| |
| While this is good idea, unfortunately these constructs may |
| be created by "adds using lea" optimization for incorrect |
| code like: |
| |
| int a; |
| int foo(int i) |
| { |
| return *(&a+i); |
| } |
| |
| This code is nonsensical, but results in addressing |
| GOT table with pic_offset_table_rtx base. We can't |
| just refuse it easily, since it gets matched by |
| "addsi3" pattern, that later gets split to lea in the |
| case output register differs from input. While this |
| can be handled by separate addsi pattern for this case |
| that never results in lea, this seems to be easier and |
| correct fix for crash to disable this test. */ |
| } |
| else if (GET_CODE (disp) != LABEL_REF |
| && !CONST_INT_P (disp) |
| && (GET_CODE (disp) != CONST |
| || !ix86_legitimate_constant_p (Pmode, disp)) |
| && (GET_CODE (disp) != SYMBOL_REF |
| || !ix86_legitimate_constant_p (Pmode, disp))) |
| /* Displacement is not constant. */ |
| return false; |
| else if (TARGET_64BIT |
| && !x86_64_immediate_operand (disp, VOIDmode)) |
| /* Displacement is out of range. */ |
| return false; |
| /* In x32 mode, constant addresses are sign extended to 64bit, so |
| we have to prevent addresses from 0x80000000 to 0xffffffff. */ |
| else if (TARGET_X32 && !(index || base) |
| && CONST_INT_P (disp) |
| && val_signbit_known_set_p (SImode, INTVAL (disp))) |
| return false; |
| } |
| |
| /* Everything looks valid. */ |
| return true; |
| } |
| |
| /* Determine if a given RTX is a valid constant address. */ |
| |
| bool |
| constant_address_p (rtx x) |
| { |
| return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); |
| } |
| |
| /* Return a unique alias set for the GOT. */ |
| |
| static alias_set_type |
| ix86_GOT_alias_set (void) |
| { |
| static alias_set_type set = -1; |
| if (set == -1) |
| set = new_alias_set (); |
| return set; |
| } |
| |
| /* Return a legitimate reference for ORIG (an address) using the |
| register REG. If REG is 0, a new pseudo is generated. |
| |
| There are two types of references that must be handled: |
| |
| 1. Global data references must load the address from the GOT, via |
| the PIC reg. An insn is emitted to do this load, and the reg is |
| returned. |
| |
| 2. Static data references, constant pool addresses, and code labels |
| compute the address as an offset from the GOT, whose base is in |
| the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to |
| differentiate them from global data objects. The returned |
| address is the PIC reg + an unspec constant. |
| |
| TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
| reg also appears in the address. */ |
| |
| static rtx |
| legitimize_pic_address (rtx orig, rtx reg) |
| { |
| rtx addr = orig; |
| rtx new_rtx = orig; |
| |
| #if TARGET_MACHO |
| if (TARGET_MACHO && !TARGET_64BIT) |
| { |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| /* Use the generic Mach-O PIC machinery. */ |
| return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); |
| } |
| #endif |
| |
| if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
| { |
| rtx tmp = legitimize_pe_coff_symbol (addr, true); |
| if (tmp) |
| return tmp; |
| } |
| |
| if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) |
| new_rtx = addr; |
| else if (TARGET_64BIT && !TARGET_PECOFF |
| && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode)) |
| { |
| rtx tmpreg; |
| /* This symbol may be referenced via a displacement from the PIC |
| base address (@GOTOFF). */ |
| |
| if (reload_in_progress) |
| df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); |
| if (GET_CODE (addr) == CONST) |
| addr = XEXP (addr, 0); |
| if (GET_CODE (addr) == PLUS) |
| { |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
| UNSPEC_GOTOFF); |
| new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
| } |
| else |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| if (!reg) |
| tmpreg = gen_reg_rtx (Pmode); |
| else |
| tmpreg = reg; |
| emit_move_insn (tmpreg, new_rtx); |
| |
| if (reg != 0) |
| { |
| new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, |
| tmpreg, 1, OPTAB_DIRECT); |
| new_rtx = reg; |
| } |
| else |
| new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); |
| } |
| else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode)) |
| { |
| /* This symbol may be referenced via a displacement from the PIC |
| base address (@GOTOFF). */ |
| |
| if (reload_in_progress) |
| df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); |
| if (GET_CODE (addr) == CONST) |
| addr = XEXP (addr, 0); |
| if (GET_CODE (addr) == PLUS) |
| { |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), |
| UNSPEC_GOTOFF); |
| new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); |
| } |
| else |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| |
| if (reg != 0) |
| { |
| emit_move_insn (reg, new_rtx); |
| new_rtx = reg; |
| } |
| } |
| else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) |
| /* We can't use @GOTOFF for text labels on VxWorks; |
| see gotoff_operand. */ |
| || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) |
| { |
| rtx tmp = legitimize_pe_coff_symbol (addr, true); |
| if (tmp) |
| return tmp; |
| |
| /* For x64 PE-COFF there is no GOT table. So we use address |
| directly. */ |
| if (TARGET_64BIT && TARGET_PECOFF) |
| { |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| emit_move_insn (reg, new_rtx); |
| new_rtx = reg; |
| } |
| else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) |
| { |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| new_rtx = gen_const_mem (Pmode, new_rtx); |
| set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
| |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| /* Use directly gen_movsi, otherwise the address is loaded |
| into register for CSE. We don't want to CSE this addresses, |
| instead we CSE addresses from the GOT table, so skip this. */ |
| emit_insn (gen_movsi (reg, new_rtx)); |
| new_rtx = reg; |
| } |
| else |
| { |
| /* This symbol must be referenced via a load from the |
| Global Offset Table (@GOT). */ |
| |
| if (reload_in_progress) |
| df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| if (TARGET_64BIT) |
| new_rtx = force_reg (Pmode, new_rtx); |
| new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| new_rtx = gen_const_mem (Pmode, new_rtx); |
| set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); |
| |
| if (reg == 0) |
| reg = gen_reg_rtx (Pmode); |
| emit_move_insn (reg, new_rtx); |
| new_rtx = reg; |
| } |
| } |
| else |
| { |
| if (CONST_INT_P (addr) |
| && !x86_64_immediate_operand (addr, VOIDmode)) |
| { |
| if (reg) |
| { |
| emit_move_insn (reg, addr); |
| new_rtx = reg; |
| } |
| else |
| new_rtx = force_reg (Pmode, addr); |
| } |
| else if (GET_CODE (addr) == CONST) |
| { |
| addr = XEXP (addr, 0); |
| |
| /* We must match stuff we generate before. Assume the only |
| unspecs that can get here are ours. Not that we could do |
| anything with them anyway.... */ |
| if (GET_CODE (addr) == UNSPEC |
| || (GET_CODE (addr) == PLUS |
| && GET_CODE (XEXP (addr, 0)) == UNSPEC)) |
| return orig; |
| gcc_assert (GET_CODE (addr) == PLUS); |
| } |
| if (GET_CODE (addr) == PLUS) |
| { |
| rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); |
| |
| /* Check first to see if this is a constant offset from a @GOTOFF |
| symbol reference. */ |
| if (!TARGET_PECOFF && gotoff_operand (op0, Pmode) |
| && CONST_INT_P (op1)) |
| { |
| if (!TARGET_64BIT) |
| { |
| if (reload_in_progress) |
| df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); |
| new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), |
| UNSPEC_GOTOFF); |
| new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); |
| new_rtx = gen_rtx_CONST (Pmode, new_rtx); |
| new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); |
| |
| if (reg != 0) |
| { |
| emit_move_insn (reg, new_rtx); |
| new_rtx = reg; |
| } |
| } |
| else |
| { |
| if (INTVAL (op1) < -16*1024*1024 |
| || INTVAL (op1) >= 16*1024*1024) |
| { |
| if (!x86_64_immediate_operand (op1, Pmode)) |
| op1 = force_reg (Pmode, op1); |
| new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); |
| } |
| } |
| } |
| else |
| { |
| rtx base = legitimize_pic_address (op0, reg); |
| enum machine_mode mode = GET_MODE (base); |
| new_rtx |
| = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); |
| |
| if (CONST_INT_P (new_rtx)) |
| { |
| if (INTVAL (new_rtx) < -16*1024*1024 |
| || INTVAL (new_rtx) >= 16*1024*1024) |
| { |
| if (!x86_64_immediate_operand (new_rtx, mode)) |
| new_rtx = force_reg (mode, new_rtx); |
| new_rtx |
| = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); |
| } |
| else |
| new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); |
| } |
| else |
| { |
| if (GET_CODE (new_rtx) == PLUS |
| && CONSTANT_P (XEXP (new_rtx, 1))) |
| { |
| base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); |
| new_rtx = XEXP (new_rtx, 1); |
| } |
| new_rtx = gen_rtx_PLUS (mode, base, new_rtx); |
| } |
| } |
| } |
| } |
| return new_rtx; |
| } |
| |
| /* Load the thread pointer. If TO_REG is true, force it into a register. */ |
| |
| static rtx |
| get_thread_pointer (enum machine_mode tp_mode, bool to_reg) |
| { |
| rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); |
| |
| if (GET_MODE (tp) != tp_mode) |
| { |
| gcc_assert (GET_MODE (tp) == SImode); |
| gcc_assert (tp_mode == DImode); |
| |
| tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); |
| } |
| |
| if (to_reg) |
| tp = copy_to_mode_reg (tp_mode, tp); |
| |
| return tp; |
| } |
| |
| /* Construct the SYMBOL_REF for the tls_get_addr function. */ |
| |
| static GTY(()) rtx ix86_tls_symbol; |
| |
| static rtx |
| ix86_tls_get_addr (void) |
| { |
| if (!ix86_tls_symbol) |
| { |
| const char *sym |
| = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) |
| ? "___tls_get_addr" : "__tls_get_addr"); |
| |
| ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); |
| } |
| |
| if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) |
| { |
| rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), |
| UNSPEC_PLTOFF); |
| return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, |
| gen_rtx_CONST (Pmode, unspec)); |
| } |
| |
| return ix86_tls_symbol; |
| } |
| |
| /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ |
| |
| static GTY(()) rtx ix86_tls_module_base_symbol; |
| |
| rtx |
| ix86_tls_module_base (void) |
| { |
| if (!ix86_tls_module_base_symbol) |
| { |
| ix86_tls_module_base_symbol |
| = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); |
| |
| SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) |
| |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; |
| } |
| |
| return ix86_tls_module_base_symbol; |
| } |
| |
| /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
| false if we expect this to be used for a memory address and true if |
| we expect to load the address into a register. */ |
| |
| static rtx |
| legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) |
| { |
| rtx dest, base, off; |
| rtx pic = NULL_RTX, tp = NULL_RTX; |
| enum machine_mode tp_mode = Pmode; |
| int type; |
| |
| /* Fall back to global dynamic model if tool chain cannot support local |
| dynamic. */ |
| if (TARGET_SUN_TLS && !TARGET_64BIT |
| && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM |
| && model == TLS_MODEL_LOCAL_DYNAMIC) |
| model = TLS_MODEL_GLOBAL_DYNAMIC; |
| |
| switch (model) |
| { |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| dest = gen_reg_rtx (Pmode); |
| |
| if (!TARGET_64BIT) |
| { |
| if (flag_pic && !TARGET_PECOFF) |
| pic = pic_offset_table_rtx; |
| else |
| { |
| pic = gen_reg_rtx (Pmode); |
| emit_insn (gen_set_got (pic)); |
| } |
| } |
| |
| if (TARGET_GNU2_TLS) |
| { |
| if (TARGET_64BIT) |
| emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); |
| else |
| emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); |
| |
| tp = get_thread_pointer (Pmode, true); |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); |
| |
| if (GET_MODE (x) != Pmode) |
| x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
| } |
| else |
| { |
| rtx caddr = ix86_tls_get_addr (); |
| |
| if (TARGET_64BIT) |
| { |
| rtx rax = gen_rtx_REG (Pmode, AX_REG); |
| rtx insns; |
| |
| start_sequence (); |
| emit_call_insn |
| (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); |
| insns = get_insns (); |
| end_sequence (); |
| |
| if (GET_MODE (x) != Pmode) |
| x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| |
| RTL_CONST_CALL_P (insns) = 1; |
| emit_libcall_block (insns, dest, rax, x); |
| } |
| else |
| emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); |
| } |
| break; |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| base = gen_reg_rtx (Pmode); |
| |
| if (!TARGET_64BIT) |
| { |
| if (flag_pic) |
| pic = pic_offset_table_rtx; |
| else |
| { |
| pic = gen_reg_rtx (Pmode); |
| emit_insn (gen_set_got (pic)); |
| } |
| } |
| |
| if (TARGET_GNU2_TLS) |
| { |
| rtx tmp = ix86_tls_module_base (); |
| |
| if (TARGET_64BIT) |
| emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); |
| else |
| emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); |
| |
| tp = get_thread_pointer (Pmode, true); |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, |
| gen_rtx_MINUS (Pmode, tmp, tp)); |
| } |
| else |
| { |
| rtx caddr = ix86_tls_get_addr (); |
| |
| if (TARGET_64BIT) |
| { |
| rtx rax = gen_rtx_REG (Pmode, AX_REG); |
| rtx insns, eqv; |
| |
| start_sequence (); |
| emit_call_insn |
| (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); |
| insns = get_insns (); |
| end_sequence (); |
| |
| /* Attach a unique REG_EQUAL, to allow the RTL optimizers to |
| share the LD_BASE result with other LD model accesses. */ |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
| UNSPEC_TLS_LD_BASE); |
| |
| RTL_CONST_CALL_P (insns) = 1; |
| emit_libcall_block (insns, base, rax, eqv); |
| } |
| else |
| emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); |
| } |
| |
| off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); |
| off = gen_rtx_CONST (Pmode, off); |
| |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); |
| |
| if (TARGET_GNU2_TLS) |
| { |
| dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); |
| |
| if (GET_MODE (x) != Pmode) |
| x = gen_rtx_ZERO_EXTEND (Pmode, x); |
| |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, x); |
| } |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| if (TARGET_64BIT) |
| { |
| if (TARGET_SUN_TLS && !TARGET_X32) |
| { |
| /* The Sun linker took the AMD64 TLS spec literally |
| and can only handle %rax as destination of the |
| initial executable code sequence. */ |
| |
| dest = gen_reg_rtx (DImode); |
| emit_insn (gen_tls_initial_exec_64_sun (dest, x)); |
| return dest; |
| } |
| |
| /* Generate DImode references to avoid %fs:(%reg32) |
| problems and linker IE->LE relaxation bug. */ |
| tp_mode = DImode; |
| pic = NULL; |
| type = UNSPEC_GOTNTPOFF; |
| } |
| else if (flag_pic) |
| { |
| if (reload_in_progress) |
| df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); |
| pic = pic_offset_table_rtx; |
| type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; |
| } |
| else if (!TARGET_ANY_GNU_TLS) |
| { |
| pic = gen_reg_rtx (Pmode); |
| emit_insn (gen_set_got (pic)); |
| type = UNSPEC_GOTTPOFF; |
| } |
| else |
| { |
| pic = NULL; |
| type = UNSPEC_INDNTPOFF; |
| } |
| |
| off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); |
| off = gen_rtx_CONST (tp_mode, off); |
| if (pic) |
| off = gen_rtx_PLUS (tp_mode, pic, off); |
| off = gen_const_mem (tp_mode, off); |
| set_mem_alias_set (off, ix86_GOT_alias_set ()); |
| |
| if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| { |
| base = get_thread_pointer (tp_mode, |
| for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| off = force_reg (tp_mode, off); |
| return gen_rtx_PLUS (tp_mode, base, off); |
| } |
| else |
| { |
| base = get_thread_pointer (Pmode, true); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (ix86_gen_sub3 (dest, base, off)); |
| } |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), |
| (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| ? UNSPEC_NTPOFF : UNSPEC_TPOFF); |
| off = gen_rtx_CONST (Pmode, off); |
| |
| if (TARGET_64BIT || TARGET_ANY_GNU_TLS) |
| { |
| base = get_thread_pointer (Pmode, |
| for_mov || !TARGET_TLS_DIRECT_SEG_REFS); |
| return gen_rtx_PLUS (Pmode, base, off); |
| } |
| else |
| { |
| base = get_thread_pointer (Pmode, true); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (ix86_gen_sub3 (dest, base, off)); |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return dest; |
| } |
| |
| /* Create or return the unique __imp_DECL dllimport symbol corresponding |
| to symbol DECL if BEIMPORT is true. Otherwise create or return the |
| unique refptr-DECL symbol corresponding to symbol DECL. */ |
| |
| static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) |
| htab_t dllimport_map; |
| |
| static tree |
| get_dllimport_decl (tree decl, bool beimport) |
| { |
| struct tree_map *h, in; |
| void **loc; |
| const char *name; |
| const char *prefix; |
| size_t namelen, prefixlen; |
| char *imp_name; |
| tree to; |
| rtx rtl; |
| |
| if (!dllimport_map) |
| dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0); |
| |
| in.hash = htab_hash_pointer (decl); |
| in.base.from = decl; |
| loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT); |
| h = (struct tree_map *) *loc; |
| if (h) |
| return h->to; |
| |
| *loc = h = ggc_alloc<tree_map> (); |
| h->hash = in.hash; |
| h->base.from = decl; |
| h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), |
| VAR_DECL, NULL, ptr_type_node); |
| DECL_ARTIFICIAL (to) = 1; |
| DECL_IGNORED_P (to) = 1; |
| DECL_EXTERNAL (to) = 1; |
| TREE_READONLY (to) = 1; |
| |
| name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
| name = targetm.strip_name_encoding (name); |
| if (beimport) |
| prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 |
| ? "*__imp_" : "*__imp__"; |
| else |
| prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; |
| namelen = strlen (name); |
| prefixlen = strlen (prefix); |
| imp_name = (char *) alloca (namelen + prefixlen + 1); |
| memcpy (imp_name, prefix, prefixlen); |
| memcpy (imp_name + prefixlen, name, namelen + 1); |
| |
| name = ggc_alloc_string (imp_name, namelen + prefixlen); |
| rtl = gen_rtx_SYMBOL_REF (Pmode, name); |
| SET_SYMBOL_REF_DECL (rtl, to); |
| SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; |
| if (!beimport) |
| { |
| SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; |
| #ifdef SUB_TARGET_RECORD_STUB |
| SUB_TARGET_RECORD_STUB (name); |
| #endif |
| } |
| |
| rtl = gen_const_mem (Pmode, rtl); |
| set_mem_alias_set (rtl, ix86_GOT_alias_set ()); |
| |
| SET_DECL_RTL (to, rtl); |
| SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); |
| |
| return to; |
| } |
| |
| /* Expand SYMBOL into its corresponding far-addresse symbol. |
| WANT_REG is true if we require the result be a register. */ |
| |
| static rtx |
| legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) |
| { |
| tree imp_decl; |
| rtx x; |
| |
| gcc_assert (SYMBOL_REF_DECL (symbol)); |
| imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); |
| |
| x = DECL_RTL (imp_decl); |
| if (want_reg) |
| x = force_reg (Pmode, x); |
| return x; |
| } |
| |
| /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is |
| true if we require the result be a register. */ |
| |
| static rtx |
| legitimize_dllimport_symbol (rtx symbol, bool want_reg) |
| { |
| tree imp_decl; |
| rtx x; |
| |
| gcc_assert (SYMBOL_REF_DECL (symbol)); |
| imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); |
| |
| x = DECL_RTL (imp_decl); |
| if (want_reg) |
| x = force_reg (Pmode, x); |
| return x; |
| } |
| |
| /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG |
| is true if we require the result be a register. */ |
| |
| static rtx |
| legitimize_pe_coff_symbol (rtx addr, bool inreg) |
| { |
| if (!TARGET_PECOFF) |
| return NULL_RTX; |
| |
| if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
| { |
| if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) |
| return legitimize_dllimport_symbol (addr, inreg); |
| if (GET_CODE (addr) == CONST |
| && GET_CODE (XEXP (addr, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
| && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) |
| { |
| rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); |
| return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
| } |
| } |
| |
| if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) |
| return NULL_RTX; |
| if (GET_CODE (addr) == SYMBOL_REF |
| && !is_imported_p (addr) |
| && SYMBOL_REF_EXTERNAL_P (addr) |
| && SYMBOL_REF_DECL (addr)) |
| return legitimize_pe_coff_extern_decl (addr, inreg); |
| |
| if (GET_CODE (addr) == CONST |
| && GET_CODE (XEXP (addr, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF |
| && !is_imported_p (XEXP (XEXP (addr, 0), 0)) |
| && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) |
| && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) |
| { |
| rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); |
| return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); |
| } |
| return NULL_RTX; |
| } |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. If we find one, return the new, valid address. |
| This macro is used in only one place: `memory_address' in explow.c. |
| |
| OLDX is the address as it was before break_out_memory_refs was called. |
| In some cases it is useful to look at this to decide what needs to be done. |
| |
| It is always safe for this macro to do nothing. It exists to recognize |
| opportunities to optimize the output. |
| |
| For the 80386, we handle X+REG by loading X into a register R and |
| using R+REG. R will go in a general reg and indexing will be used. |
| However, if REG is a broken-out memory address or multiplication, |
| nothing needs to be done because REG can certainly go in a general reg. |
| |
| When -fpic is used, special handling is needed for symbolic references. |
| See comments by legitimize_pic_address in i386.c for details. */ |
| |
| static rtx |
| ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
| enum machine_mode mode) |
| { |
| int changed = 0; |
| unsigned log; |
| |
| log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; |
| if (log) |
| return legitimize_tls_address (x, (enum tls_model) log, false); |
| if (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF |
| && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) |
| { |
| rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), |
| (enum tls_model) log, false); |
| return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); |
| } |
| |
| if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) |
| { |
| rtx tmp = legitimize_pe_coff_symbol (x, true); |
| if (tmp) |
| return tmp; |
| } |
| |
| if (flag_pic && SYMBOLIC_CONST (x)) |
| return legitimize_pic_address (x, 0); |
| |
| #if TARGET_MACHO |
| if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) |
| return machopic_indirect_data_reference (x, 0); |
| #endif |
| |
| /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ |
| if (GET_CODE (x) == ASHIFT |
| && CONST_INT_P (XEXP (x, 1)) |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (x, 1)); |
| x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ |
| |
| if (GET_CODE (XEXP (x, 0)) == ASHIFT |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (XEXP (x, 0), 1)); |
| XEXP (x, 0) = gen_rtx_MULT (Pmode, |
| force_reg (Pmode, XEXP (XEXP (x, 0), 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| if (GET_CODE (XEXP (x, 1)) == ASHIFT |
| && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
| && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) |
| { |
| changed = 1; |
| log = INTVAL (XEXP (XEXP (x, 1), 1)); |
| XEXP (x, 1) = gen_rtx_MULT (Pmode, |
| force_reg (Pmode, XEXP (XEXP (x, 1), 0)), |
| GEN_INT (1 << log)); |
| } |
| |
| /* Put multiply first if it isn't already. */ |
| if (GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| rtx tmp = XEXP (x, 0); |
| XEXP (x, 0) = XEXP (x, 1); |
| XEXP (x, 1) = tmp; |
| changed = 1; |
| } |
| |
| /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) |
| into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be |
| created by virtual register instantiation, register elimination, and |
| similar optimizations. */ |
| if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) |
| { |
| changed = 1; |
| x = gen_rtx_PLUS (Pmode, |
| gen_rtx_PLUS (Pmode, XEXP (x, 0), |
| XEXP (XEXP (x, 1), 0)), |
| XEXP (XEXP (x, 1), 1)); |
| } |
| |
| /* Canonicalize |
| (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) |
| into (plus (plus (mult (reg) (const)) (reg)) (const)). */ |
| else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS |
| && CONSTANT_P (XEXP (x, 1))) |
| { |
| rtx constant; |
| rtx other = NULL_RTX; |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| constant = XEXP (x, 1); |
| other = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| } |
| else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) |
| { |
| constant = XEXP (XEXP (XEXP (x, 0), 1), 1); |
| other = XEXP (x, 1); |
| } |
| else |
| constant = 0; |
| |
| if (constant) |
| { |
| changed = 1; |
| x = gen_rtx_PLUS (Pmode, |
| gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), |
| XEXP (XEXP (XEXP (x, 0), 1), 0)), |
| plus_constant (Pmode, other, |
| INTVAL (constant))); |
| } |
| } |
| |
| if (changed && ix86_legitimate_address_p (mode, x, false)) |
| return x; |
| |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| changed = 1; |
| XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); |
| } |
| |
| if (GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| changed = 1; |
| XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); |
| } |
| |
| if (changed |
| && REG_P (XEXP (x, 1)) |
| && REG_P (XEXP (x, 0))) |
| return x; |
| |
| if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) |
| { |
| changed = 1; |
| x = legitimize_pic_address (x, 0); |
| } |
| |
| if (changed && ix86_legitimate_address_p (mode, x, false)) |
| return x; |
| |
| if (REG_P (XEXP (x, 0))) |
| { |
| rtx temp = gen_reg_rtx (Pmode); |
| rtx val = force_operand (XEXP (x, 1), temp); |
| if (val != temp) |
| { |
| val = convert_to_mode (Pmode, val, 1); |
| emit_move_insn (temp, val); |
| } |
| |
| XEXP (x, 1) = temp; |
| return x; |
| } |
| |
| else if (REG_P (XEXP (x, 1))) |
| { |
| rtx temp = gen_reg_rtx (Pmode); |
| rtx val = force_operand (XEXP (x, 0), temp); |
| if (val != temp) |
| { |
| val = convert_to_mode (Pmode, val, 1); |
| emit_move_insn (temp, val); |
| } |
| |
| XEXP (x, 0) = temp; |
| return x; |
| } |
| } |
| |
| return x; |
| } |
| |
| /* Print an integer constant expression in assembler syntax. Addition |
| and subtraction are the only arithmetic that may appear in these |
| expressions. FILE is the stdio stream to write to, X is the rtx, and |
| CODE is the operand print code from the output string. */ |
| |
| static void |
| output_pic_addr_const (FILE *file, rtx x, int code) |
| { |
| char buf[256]; |
| |
| switch (GET_CODE (x)) |
| { |
| case PC: |
| gcc_assert (flag_pic); |
| putc ('.', file); |
| break; |
| |
| case SYMBOL_REF: |
| if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS) |
| output_addr_const (file, x); |
| else |
| { |
| const char *name = XSTR (x, 0); |
| |
| /* Mark the decl as referenced so that cgraph will |
| output the function. */ |
| if (SYMBOL_REF_DECL (x)) |
| mark_decl_referenced (SYMBOL_REF_DECL (x)); |
| |
| #if TARGET_MACHO |
| if (MACHOPIC_INDIRECT |
| && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) |
| name = machopic_indirection_name (x, /*stub_p=*/true); |
| #endif |
| assemble_name (file, name); |
| } |
| if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) |
| && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) |
| fputs ("@PLT", file); |
| break; |
| |
| case LABEL_REF: |
| x = XEXP (x, 0); |
| /* FALLTHRU */ |
| case CODE_LABEL: |
| ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); |
| assemble_name (asm_out_file, buf); |
| break; |
| |
| case CONST_INT: |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); |
| break; |
| |
| case CONST: |
| /* This used to output parentheses around the expression, |
| but that does not work on the 386 (either ATT or BSD assembler). */ |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| break; |
| |
| case CONST_DOUBLE: |
| if (GET_MODE (x) == VOIDmode) |
| { |
| /* We can use %d if the number is <32 bits and positive. */ |
| if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) |
| fprintf (file, "0x%lx%08lx", |
| (unsigned long) CONST_DOUBLE_HIGH (x), |
| (unsigned long) CONST_DOUBLE_LOW (x)); |
| else |
| fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); |
| } |
| else |
| /* We can't handle floating point constants; |
| TARGET_PRINT_OPERAND must handle them. */ |
| output_operand_lossage ("floating constant misused"); |
| break; |
| |
| case PLUS: |
| /* Some assemblers need integer constants to appear first. */ |
| if (CONST_INT_P (XEXP (x, 0))) |
| { |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| putc ('+', file); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| } |
| else |
| { |
| gcc_assert (CONST_INT_P (XEXP (x, 1))); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| putc ('+', file); |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| } |
| break; |
| |
| case MINUS: |
| if (!TARGET_MACHO) |
| putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); |
| output_pic_addr_const (file, XEXP (x, 0), code); |
| putc ('-', file); |
| output_pic_addr_const (file, XEXP (x, 1), code); |
| if (!TARGET_MACHO) |
| putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); |
| break; |
| |
| case UNSPEC: |
| if (XINT (x, 1) == UNSPEC_STACK_CHECK) |
| { |
| bool f = i386_asm_output_addr_const_extra (file, x); |
| gcc_assert (f); |
| break; |
| } |
| |
| gcc_assert (XVECLEN (x, 0) == 1); |
| output_pic_addr_const (file, XVECEXP (x, 0, 0), code); |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_GOT: |
| fputs ("@GOT", file); |
| break; |
| case UNSPEC_GOTOFF: |
| fputs ("@GOTOFF", file); |
| break; |
| case UNSPEC_PLTOFF: |
| fputs ("@PLTOFF", file); |
| break; |
| case UNSPEC_PCREL: |
| fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| "(%rip)" : "[rip]", file); |
| break; |
| case UNSPEC_GOTPCREL: |
| fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); |
| break; |
| case UNSPEC_GOTTPOFF: |
| /* FIXME: This might be @TPOFF in Sun ld too. */ |
| fputs ("@gottpoff", file); |
| break; |
| case UNSPEC_TPOFF: |
| fputs ("@tpoff", file); |
| break; |
| case UNSPEC_NTPOFF: |
| if (TARGET_64BIT) |
| fputs ("@tpoff", file); |
| else |
| fputs ("@ntpoff", file); |
| break; |
| case UNSPEC_DTPOFF: |
| fputs ("@dtpoff", file); |
| break; |
| case UNSPEC_GOTNTPOFF: |
| if (TARGET_64BIT) |
| fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
| "@gottpoff(%rip)": "@gottpoff[rip]", file); |
| else |
| fputs ("@gotntpoff", file); |
| break; |
| case UNSPEC_INDNTPOFF: |
| fputs ("@indntpoff", file); |
| break; |
| #if TARGET_MACHO |
| case UNSPEC_MACHOPIC_OFFSET: |
| putc ('-', file); |
| machopic_output_function_base_name (file); |
| break; |
| #endif |
| default: |
| output_operand_lossage ("invalid UNSPEC as operand"); |
| break; |
| } |
| break; |
| |
| default: |
| output_operand_lossage ("invalid expression as operand"); |
| } |
| } |
| |
| /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. |
| We need to emit DTP-relative relocations. */ |
| |
| static void ATTRIBUTE_UNUSED |
| i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
| { |
| fputs (ASM_LONG, file); |
| output_addr_const (file, x); |
| fputs ("@dtpoff", file); |
| switch (size) |
| { |
| case 4: |
| break; |
| case 8: |
| fputs (", 0", file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return true if X is a representation of the PIC register. This copes |
| with calls from ix86_find_base_term, where the register might have |
| been replaced by a cselib value. */ |
| |
| static bool |
| ix86_pic_register_p (rtx x) |
| { |
| if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) |
| return (pic_offset_table_rtx |
| && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); |
| else |
| return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM; |
| } |
| |
| /* Helper function for ix86_delegitimize_address. |
| Attempt to delegitimize TLS local-exec accesses. */ |
| |
| static rtx |
| ix86_delegitimize_tls_address (rtx orig_x) |
| { |
| rtx x = orig_x, unspec; |
| struct ix86_address addr; |
| |
| if (!TARGET_TLS_DIRECT_SEG_REFS) |
| return orig_x; |
| if (MEM_P (x)) |
| x = XEXP (x, 0); |
| if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) |
| return orig_x; |
| if (ix86_decompose_address (x, &addr) == 0 |
| || addr.seg != DEFAULT_TLS_SEG_REG |
| || addr.disp == NULL_RTX |
| || GET_CODE (addr.disp) != CONST) |
| return orig_x; |
| unspec = XEXP (addr.disp, 0); |
| if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) |
| unspec = XEXP (unspec, 0); |
| if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) |
| return orig_x; |
| x = XVECEXP (unspec, 0, 0); |
| gcc_assert (GET_CODE (x) == SYMBOL_REF); |
| if (unspec != XEXP (addr.disp, 0)) |
| x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); |
| if (addr.index) |
| { |
| rtx idx = addr.index; |
| if (addr.scale != 1) |
| idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); |
| x = gen_rtx_PLUS (Pmode, idx, x); |
| } |
| if (addr.base) |
| x = gen_rtx_PLUS (Pmode, addr.base, x); |
| if (MEM_P (orig_x)) |
| x = replace_equiv_address_nv (orig_x, x); |
| return x; |
| } |
| |
| /* In the name of slightly smaller debug output, and to cater to |
| general assembler lossage, recognize PIC+GOTOFF and turn it back |
| into a direct symbol reference. |
| |
| On Darwin, this is necessary to avoid a crash, because Darwin |
| has a different PIC label for each routine but the DWARF debugging |
| information is not associated with any particular routine, so it's |
| necessary to remove references to the PIC label from RTL stored by |
| the DWARF output code. */ |
| |
| static rtx |
| ix86_delegitimize_address (rtx x) |
| { |
| rtx orig_x = delegitimize_mem_from_attrs (x); |
| /* addend is NULL or some rtx if x is something+GOTOFF where |
| something doesn't include the PIC register. */ |
| rtx addend = NULL_RTX; |
| /* reg_addend is NULL or a multiple of some register. */ |
| rtx reg_addend = NULL_RTX; |
| /* const_addend is NULL or a const_int. */ |
| rtx const_addend = NULL_RTX; |
| /* This is the result, or NULL. */ |
| rtx result = NULL_RTX; |
| |
| x = orig_x; |
| |
| if (MEM_P (x)) |
| x = XEXP (x, 0); |
| |
| if (TARGET_64BIT) |
| { |
| if (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_MODE (XEXP (x, 0)) == Pmode |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC |
| && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) |
| { |
| rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); |
| x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); |
| if (MEM_P (orig_x)) |
| x = replace_equiv_address_nv (orig_x, x); |
| return x; |
| } |
| |
| if (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == UNSPEC |
| && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL |
| || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) |
| && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) |
| { |
| x = XVECEXP (XEXP (x, 0), 0, 0); |
| if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) |
| { |
| x = simplify_gen_subreg (GET_MODE (orig_x), x, |
| GET_MODE (x), 0); |
| if (x == NULL_RTX) |
| return orig_x; |
| } |
| return x; |
| } |
| |
| if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) |
| return ix86_delegitimize_tls_address (orig_x); |
| |
| /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic |
| and -mcmodel=medium -fpic. */ |
| } |
| |
| if (GET_CODE (x) != PLUS |
| || GET_CODE (XEXP (x, 1)) != CONST) |
| return ix86_delegitimize_tls_address (orig_x); |
| |
| if (ix86_pic_register_p (XEXP (x, 0))) |
| /* %ebx + GOT/GOTOFF */ |
| ; |
| else if (GET_CODE (XEXP (x, 0)) == PLUS) |
| { |
| /* %ebx + %reg * scale + GOT/GOTOFF */ |
| reg_addend = XEXP (x, 0); |
| if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
| reg_addend = XEXP (reg_addend, 1); |
| else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
| reg_addend = XEXP (reg_addend, 0); |
| else |
| { |
| reg_addend = NULL_RTX; |
| addend = XEXP (x, 0); |
| } |
| } |
| else |
| addend = XEXP (x, 0); |
| |
| x = XEXP (XEXP (x, 1), 0); |
| if (GET_CODE (x) == PLUS |
| && CONST_INT_P (XEXP (x, 1))) |
| { |
| const_addend = XEXP (x, 1); |
| x = XEXP (x, 0); |
| } |
| |
| if (GET_CODE (x) == UNSPEC |
| && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
| || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) |
| || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC |
| && !MEM_P (orig_x) && !addend))) |
| result = XVECEXP (x, 0, 0); |
| |
| if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) |
| && !MEM_P (orig_x)) |
| result = XVECEXP (x, 0, 0); |
| |
| if (! result) |
| return ix86_delegitimize_tls_address (orig_x); |
| |
| if (const_addend) |
| result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
| if (reg_addend) |
| result = gen_rtx_PLUS (Pmode, reg_addend, result); |
| if (addend) |
| { |
| /* If the rest of original X doesn't involve the PIC register, add |
| addend and subtract pic_offset_table_rtx. This can happen e.g. |
| for code like: |
| leal (%ebx, %ecx, 4), %ecx |
| ... |
| movl foo@GOTOFF(%ecx), %edx |
| in which case we return (%ecx - %ebx) + foo. */ |
| if (pic_offset_table_rtx) |
| result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
| pic_offset_table_rtx), |
| result); |
| else |
| return orig_x; |
| } |
| if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
| { |
| result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0); |
| if (result == NULL_RTX) |
| return orig_x; |
| } |
| return result; |
| } |
| |
| /* If X is a machine specific address (i.e. a symbol or label being |
| referenced as a displacement from the GOT implemented using an |
| UNSPEC), then return the base term. Otherwise return X. */ |
| |
| rtx |
| ix86_find_base_term (rtx x) |
| { |
| rtx term; |
| |
| if (TARGET_64BIT) |
| { |
| if (GET_CODE (x) != CONST) |
| return x; |
| term = XEXP (x, 0); |
| if (GET_CODE (term) == PLUS |
| && (CONST_INT_P (XEXP (term, 1)) |
| || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) |
| term = XEXP (term, 0); |
| if (GET_CODE (term) != UNSPEC |
| || (XINT (term, 1) != UNSPEC_GOTPCREL |
| && XINT (term, 1) != UNSPEC_PCREL)) |
| return x; |
| |
| return XVECEXP (term, 0, 0); |
| } |
| |
| return ix86_delegitimize_address (x); |
| } |
| |
| static void |
| put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, |
| bool fp, FILE *file) |
| { |
| const char *suffix; |
| |
| if (mode == CCFPmode || mode == CCFPUmode) |
| { |
| code = ix86_fp_compare_code_to_integer (code); |
| mode = CCmode; |
| } |
| if (reverse) |
| code = reverse_condition (code); |
| |
| switch (code) |
| { |
| case EQ: |
| switch (mode) |
| { |
| case CCAmode: |
| suffix = "a"; |
| break; |
| |
| case CCCmode: |
| suffix = "c"; |
| break; |
| |
| case CCOmode: |
| suffix = "o"; |
| break; |
| |
| case CCSmode: |
| suffix = "s"; |
| break; |
| |
| default: |
| suffix = "e"; |
| } |
| break; |
| case NE: |
| switch (mode) |
| { |
| case CCAmode: |
| suffix = "na"; |
| break; |
| |
| case CCCmode: |
| suffix = "nc"; |
| break; |
| |
| case CCOmode: |
| suffix = "no"; |
| break; |
| |
| case CCSmode: |
| suffix = "ns"; |
| break; |
| |
| default: |
| suffix = "ne"; |
| } |
| break; |
| case GT: |
| gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); |
| suffix = "g"; |
| break; |
| case GTU: |
| /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. |
| Those same assemblers have the same but opposite lossage on cmov. */ |
| if (mode == CCmode) |
| suffix = fp ? "nbe" : "a"; |
| else |
| gcc_unreachable (); |
| break; |
| case LT: |
| switch (mode) |
| { |
| case CCNOmode: |
| case CCGOCmode: |
| suffix = "s"; |
| break; |
| |
| case CCmode: |
| case CCGCmode: |
| suffix = "l"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| case LTU: |
| if (mode == CCmode) |
| suffix = "b"; |
| else if (mode == CCCmode) |
| suffix = "c"; |
| else |
| gcc_unreachable (); |
| break; |
| case GE: |
| switch (mode) |
| { |
| case CCNOmode: |
| case CCGOCmode: |
| suffix = "ns"; |
| break; |
| |
| case CCmode: |
| case CCGCmode: |
| suffix = "ge"; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| case GEU: |
| if (mode == CCmode) |
| suffix = fp ? "nb" : "ae"; |
| else if (mode == CCCmode) |
| suffix = "nc"; |
| else |
| gcc_unreachable (); |
| break; |
| case LE: |
| gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); |
| suffix = "le"; |
| break; |
| case LEU: |
| if (mode == CCmode) |
| suffix = "be"; |
| else |
| gcc_unreachable (); |
| break; |
| case UNORDERED: |
| suffix = fp ? "u" : "p"; |
| break; |
| case ORDERED: |
| suffix = fp ? "nu" : "np"; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| fputs (suffix, file); |
| } |
| |
| /* Print the name of register X to FILE based on its machine mode and number. |
| If CODE is 'w', pretend the mode is HImode. |
| If CODE is 'b', pretend the mode is QImode. |
| If CODE is 'k', pretend the mode is SImode. |
| If CODE is 'q', pretend the mode is DImode. |
| If CODE is 'x', pretend the mode is V4SFmode. |
| If CODE is 't', pretend the mode is V8SFmode. |
| If CODE is 'g', pretend the mode is V16SFmode. |
| If CODE is 'h', pretend the reg is the 'high' byte register. |
| If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. |
| If CODE is 'd', duplicate the operand for AVX instruction. |
| */ |
| |
| void |
| print_reg (rtx x, int code, FILE *file) |
| { |
| const char *reg; |
| unsigned int regno; |
| bool duplicated = code == 'd' && TARGET_AVX; |
| |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('%', file); |
| |
| if (x == pc_rtx) |
| { |
| gcc_assert (TARGET_64BIT); |
| fputs ("rip", file); |
| return; |
| } |
| |
| regno = true_regnum (x); |
| gcc_assert (regno != ARG_POINTER_REGNUM |
| && regno != FRAME_POINTER_REGNUM |
| && regno != FLAGS_REG |
| && regno != FPSR_REG |
| && regno != FPCR_REG); |
| |
| if (code == 'w' || MMX_REG_P (x)) |
| code = 2; |
| else if (code == 'b') |
| code = 1; |
| else if (code == 'k') |
| code = 4; |
| else if (code == 'q') |
| code = 8; |
| else if (code == 'y') |
| code = 3; |
| else if (code == 'h') |
| code = 0; |
| else if (code == 'x') |
| code = 16; |
| else if (code == 't') |
| code = 32; |
| else if (code == 'g') |
| code = 64; |
| else |
| code = GET_MODE_SIZE (GET_MODE (x)); |
| |
| /* Irritatingly, AMD extended registers use different naming convention |
| from the normal registers: "r%d[bwd]" */ |
| if (REX_INT_REGNO_P (regno)) |
| { |
| gcc_assert (TARGET_64BIT); |
| putc ('r', file); |
| fprint_ul (file, regno - FIRST_REX_INT_REG + 8); |
| switch (code) |
| { |
| case 0: |
| error ("extended registers have no high halves"); |
| break; |
| case 1: |
| putc ('b', file); |
| break; |
| case 2: |
| putc ('w', file); |
| break; |
| case 4: |
| putc ('d', file); |
| break; |
| case 8: |
| /* no suffix */ |
| break; |
| default: |
| error ("unsupported operand size for extended register"); |
| break; |
| } |
| return; |
| } |
| |
| reg = NULL; |
| switch (code) |
| { |
| case 3: |
| if (STACK_TOP_P (x)) |
| { |
| reg = "st(0)"; |
| break; |
| } |
| /* FALLTHRU */ |
| case 8: |
| case 4: |
| case 12: |
| if (! ANY_FP_REG_P (x)) |
| putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); |
| /* FALLTHRU */ |
| case 16: |
| case 2: |
| normal: |
| reg = hi_reg_name[regno]; |
| break; |
| case 1: |
| if (regno >= ARRAY_SIZE (qi_reg_name)) |
| goto normal; |
| reg = qi_reg_name[regno]; |
| break; |
| case 0: |
| if (regno >= ARRAY_SIZE (qi_high_reg_name)) |
| goto normal; |
| reg = qi_high_reg_name[regno]; |
| break; |
| case 32: |
| if (SSE_REG_P (x)) |
| { |
| gcc_assert (!duplicated); |
| putc ('y', file); |
| fputs (hi_reg_name[regno] + 1, file); |
| return; |
| } |
| case 64: |
| if (SSE_REG_P (x)) |
| { |
| gcc_assert (!duplicated); |
| putc ('z', file); |
| fputs (hi_reg_name[REGNO (x)] + 1, file); |
| return; |
| } |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| fputs (reg, file); |
| if (duplicated) |
| { |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| fprintf (file, ", %%%s", reg); |
| else |
| fprintf (file, ", %s", reg); |
| } |
| } |
| |
| /* Locate some local-dynamic symbol still in use by this function |
| so that we can print its name in some tls_local_dynamic_base |
| pattern. */ |
| |
| static int |
| get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) |
| { |
| rtx x = *px; |
| |
| if (GET_CODE (x) == SYMBOL_REF |
| && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) |
| { |
| cfun->machine->some_ld_name = XSTR (x, 0); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static const char * |
| get_some_local_dynamic_name (void) |
| { |
| rtx insn; |
| |
| if (cfun->machine->some_ld_name) |
| return cfun->machine->some_ld_name; |
| |
| for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) |
| if (NONDEBUG_INSN_P (insn) |
| && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) |
| return cfun->machine->some_ld_name; |
| |
| return NULL; |
| } |
| |
| /* Meaning of CODE: |
| L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
| C -- print opcode suffix for set/cmov insn. |
| c -- like C, but print reversed condition |
| F,f -- likewise, but for floating-point. |
| O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
| otherwise nothing |
| R -- print embeded rounding and sae. |
| r -- print only sae. |
| z -- print the opcode suffix for the size of the current operand. |
| Z -- likewise, with special suffixes for x87 instructions. |
| * -- print a star (in certain assembler syntax) |
| A -- print an absolute memory reference. |
| E -- print address with DImode register names if TARGET_64BIT. |
| w -- print the operand as if it's a "word" (HImode) even if it isn't. |
| s -- print a shift double count, followed by the assemblers argument |
| delimiter. |
| b -- print the QImode name of the register for the indicated operand. |
| %b0 would print %al if operands[0] is reg 0. |
| w -- likewise, print the HImode name of the register. |
| k -- likewise, print the SImode name of the register. |
| q -- likewise, print the DImode name of the register. |
| x -- likewise, print the V4SFmode name of the register. |
| t -- likewise, print the V8SFmode name of the register. |
| g -- likewise, print the V16SFmode name of the register. |
| h -- print the QImode name for a "high" register, either ah, bh, ch or dh. |
| y -- print "st(0)" instead of "st" as a register. |
| d -- print duplicated register operand for AVX instruction. |
| D -- print condition for SSE cmp instruction. |
| P -- if PIC, print an @PLT suffix. |
| p -- print raw symbol name. |
| X -- don't print any sort of PIC '@' suffix for a symbol. |
| & -- print some in-use local-dynamic symbol name. |
| H -- print a memory address offset by 8; used for sse high-parts |
| Y -- print condition for XOP pcom* instruction. |
| + -- print a branch hint as 'cs' or 'ds' prefix |
| ; -- print a semicolon (after prefixes due to bug in older gas). |
| ~ -- print "i" if TARGET_AVX2, "f" otherwise. |
| @ -- print a segment register of thread base pointer load |
| ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode |
| ` -- print "nacl" prefix is TARGET_SFI_CFLOW_NACL1 |
| */ |
| |
| void |
| ix86_print_operand (FILE *file, rtx x, int code) |
| { |
| if (code) |
| { |
| switch (code) |
| { |
| case 'A': |
| switch (ASSEMBLER_DIALECT) |
| { |
| case ASM_ATT: |
| if (!TARGET_SFI_CFLOW_NACL1) |
| putc ('*', file); |
| break; |
| |
| case ASM_INTEL: |
| /* Intel syntax. For absolute addresses, registers should not |
| be surrounded by braces. */ |
| if (!REG_P (x)) |
| { |
| putc ('[', file); |
| ix86_print_operand (file, x, 0); |
| putc (']', file); |
| return; |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| ix86_print_operand (file, x, 0); |
| return; |
| |
| case 'E': |
| /* Wrap address in an UNSPEC to declare special handling. */ |
| if (TARGET_64BIT) |
| x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); |
| |
| output_address (x); |
| return; |
| |
| case 'L': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('l', file); |
| return; |
| |
| case 'W': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('w', file); |
| return; |
| |
| case 'B': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('b', file); |
| return; |
| |
| case 'Q': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('l', file); |
| return; |
| |
| case 'S': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('s', file); |
| return; |
| |
| case 'T': |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('t', file); |
| return; |
| |
| case 'O': |
| #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| if (ASSEMBLER_DIALECT != ASM_ATT) |
| return; |
| |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 2: |
| putc ('w', file); |
| break; |
| |
| case 4: |
| putc ('l', file); |
| break; |
| |
| case 8: |
| putc ('q', file); |
| break; |
| |
| default: |
| output_operand_lossage |
| ("invalid operand size for operand code 'O'"); |
| return; |
| } |
| |
| putc ('.', file); |
| #endif |
| return; |
| |
| case 'z': |
| if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
| { |
| /* Opcodes don't get size suffixes if using Intel opcodes. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return; |
| |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 1: |
| putc ('b', file); |
| return; |
| |
| case 2: |
| putc ('w', file); |
| return; |
| |
| case 4: |
| putc ('l', file); |
| return; |
| |
| case 8: |
| putc ('q', file); |
| return; |
| |
| default: |
| output_operand_lossage |
| ("invalid operand size for operand code 'z'"); |
| return; |
| } |
| } |
| |
| if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
| warning |
| (0, "non-integer operand used with operand code 'z'"); |
| /* FALLTHRU */ |
| |
| case 'Z': |
| /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| return; |
| |
| if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
| { |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 2: |
| #ifdef HAVE_AS_IX86_FILDS |
| putc ('s', file); |
| #endif |
| return; |
| |
| case 4: |
| putc ('l', file); |
| return; |
| |
| case 8: |
| #ifdef HAVE_AS_IX86_FILDQ |
| putc ('q', file); |
| #else |
| fputs ("ll", file); |
| #endif |
| return; |
| |
| default: |
| break; |
| } |
| } |
| else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
| { |
| /* 387 opcodes don't get size suffixes |
| if the operands are registers. */ |
| if (STACK_REG_P (x)) |
| return; |
| |
| switch (GET_MODE_SIZE (GET_MODE (x))) |
| { |
| case 4: |
| putc ('s', file); |
| return; |
| |
| case 8: |
| putc ('l', file); |
| return; |
| |
| case 12: |
| case 16: |
| putc ('t', file); |
| return; |
| |
| default: |
| break; |
| } |
| } |
| else |
| { |
| output_operand_lossage |
| ("invalid operand type used with operand code 'Z'"); |
| return; |
| } |
| |
| output_operand_lossage |
| ("invalid operand size for operand code 'Z'"); |
| return; |
| |
| case 'd': |
| case 'b': |
| case 'w': |
| case 'k': |
| case 'q': |
| case 'h': |
| case 't': |
| case 'g': |
| case 'y': |
| case 'x': |
| case 'X': |
| case 'P': |
| case 'p': |
| break; |
| |
| case 's': |
| if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) |
| { |
| ix86_print_operand (file, x, 0); |
| fputs (", ", file); |
| } |
| return; |
| |
| case 'Y': |
| switch (GET_CODE (x)) |
| { |
| case NE: |
| fputs ("neq", file); |
| break; |
| case EQ: |
| fputs ("eq", file); |
| break; |
| case GE: |
| case GEU: |
| fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); |
| break; |
| case GT: |
| case GTU: |
| fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); |
| break; |
| case LE: |
| case LEU: |
| fputs ("le", file); |
| break; |
| case LT: |
| case LTU: |
| fputs ("lt", file); |
| break; |
| case UNORDERED: |
| fputs ("unord", file); |
| break; |
| case ORDERED: |
| fputs ("ord", file); |
| break; |
| case UNEQ: |
| fputs ("ueq", file); |
| break; |
| case UNGE: |
| fputs ("nlt", file); |
| break; |
| case UNGT: |
| fputs ("nle", file); |
| break; |
| case UNLE: |
| fputs ("ule", file); |
| break; |
| case UNLT: |
| fputs ("ult", file); |
| break; |
| case LTGT: |
| fputs ("une", file); |
| break; |
| default: |
| output_operand_lossage ("operand is not a condition code, " |
| "invalid operand code 'Y'"); |
| return; |
| } |
| return; |
| |
| case 'D': |
| /* Little bit of braindamage here. The SSE compare instructions |
| does use completely different names for the comparisons that the |
| fp conditional moves. */ |
| switch (GET_CODE (x)) |
| { |
| case UNEQ: |
| if (TARGET_AVX) |
| { |
| fputs ("eq_us", file); |
| break; |
| } |
| case EQ: |
| fputs ("eq", file); |
| break; |
| case UNLT: |
| if (TARGET_AVX) |
| { |
| fputs ("nge", file); |
| break; |
| } |
| case LT: |
| fputs ("lt", file); |
| break; |
| case UNLE: |
| if (TARGET_AVX) |
| { |
| fputs ("ngt", file); |
| break; |
| } |
| case LE: |
| fputs ("le", file); |
| break; |
| case UNORDERED: |
| fputs ("unord", file); |
| break; |
| case LTGT: |
| if (TARGET_AVX) |
| { |
| fputs ("neq_oq", file); |
| break; |
| } |
| case NE: |
| fputs ("neq", file); |
| break; |
| case GE: |
| if (TARGET_AVX) |
| { |
| fputs ("ge", file); |
| break; |
| } |
| case UNGE: |
| fputs ("nlt", file); |
| break; |
| case GT: |
| if (TARGET_AVX) |
| { |
| fputs ("gt", file); |
| break; |
| } |
| case UNGT: |
| fputs ("nle", file); |
| break; |
| case ORDERED: |
| fputs ("ord", file); |
| break; |
| default: |
| output_operand_lossage ("operand is not a condition code, " |
| "invalid operand code 'D'"); |
| return; |
| } |
| return; |
| |
| case 'F': |
| case 'f': |
| #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX |
| if (ASSEMBLER_DIALECT == ASM_ATT) |
| putc ('.', file); |
| #endif |
| |
| case 'C': |
| case 'c': |
| if (!COMPARISON_P (x)) |
| { |
| output_operand_lossage ("operand is not a condition code, " |
| "invalid operand code '%c'", code); |
| return; |
| } |
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), |
| code == 'c' || code == 'f', |
| code == 'F' || code == 'f', |
| file); |
| return; |
| |
| case 'H': |
| if (!offsettable_memref_p (x)) |
| { |
| output_operand_lossage ("operand is not an offsettable memory " |
| "reference, invalid operand code 'H'"); |
| return; |
| } |
| /* It doesn't actually matter what mode we use here, as we're |
| only going to use this for printing. */ |
| x = adjust_address_nv (x, DImode, 8); |
| /* Output 'qword ptr' for intel assembler dialect. */ |
| if (ASSEMBLER_DIALECT == ASM_INTEL) |
| code = 'q'; |
| break; |
| |
| case 'K': |
| gcc_assert (CONST_INT_P (x)); |
| |
| if (INTVAL (x) & IX86_HLE_ACQUIRE) |
| #ifdef HAVE_AS_IX86_HLE |
| fputs ("xacquire ", file); |
| #else |
|