blob: 17b2ae8db82fef8f49a07b84989d55b18d07d09d [file] [log] [blame]
/* Mulitversion Dispatch Pass.
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Sriraman Tallam (tmsriram@google.com)
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* This pass processes __builtin_dispatch calls to call multi-versioned
functions. Only two versions are supported now. Example use :
int popcnt_sse4(unsigned int x) __attribute__((__target__("popcnt")));
int popcnt_sse4(unsigned int x)
{
int count = __builtin_popcount(x);
return count;
}
int popcnt(unsigned int x) __attribute__((__target__("no-popcnt")));
int popcnt(unsigned int x)
{
int count = __builtin_popcount(x);
return count;
}
int testsse() __attribute__((version_selector));
int main ()
{
...
ret = __builtin_dispatch (testsse, (void*)popcnt_sse4, (void*)popcnt, 25);
...
}
There are two passes that are run to achieve multi-versioning.
"pass_ipa_multiversion_dispatch" is an ipa pass that decides which functions
have to be cloned and hoists the feature-test calls appropriately. This
pass can be enabled with the flag "-fclone-hot-version-paths" and disabled
with "-fno-clone-hot-version-paths".
"pass_tree_convert_builtin_dispatch" does the lowering. It is a
function-level pass. Functions marked with attribute "version_selector" are
also handled by this pass. This pass is always on.
How to use __builtin_dispatch ?
-----------------------------
__builtin_dispatch takes 3 mandatory arguments :
__builtin_dispatch (arg1, arg2, arg3, <arg4>, <arg5>, ...);
arg1 is the pointer to the feature-test function.
arg2 is the ( void *) cast pointer to the versioned function that is
executed when the feature test returns 1.
arg3 is the ( void *) cast pointer to the versioned function that is
executed when the feature test returns 0.
arg4, arg5, ... are optional. They are the arguments to the versioned
functions. Both versions must accept the same number of arguments.
The __builtin_dispatch function returns the value returned by the
versioned function that gets executed. The versioned function arg2
is executed when the feature_test function arg1 returns 1 and arg3
is executed when the feature_test function arg1 returns 0. arg1
could be marked as a "version_selector" function if it is a pure
function with no side-effects, returns a constant at run-time and
can be evaluated at any point in the execution.
When to use the "version_selector" attribute ?
-----------------------------------------------
Functions are marked with attribute "version_selector" only if
they are run-time constants. Example of such functions would
be those that test if a specific feature is available on a
particular architecture. Such functions must return a positive
integer. For two-way functions, those that test if a feature
is present or not must return 1 or 0 respectively.
The code is organized into five parts. The first part has the functionality
to detect and handle functions marked with attribute "version_selector". The
second part is the analysis phase where we find calls to __builtin_dispatch
and mark all functions that are hot and have a call-graph path to a
__builtin_dispatch call. The third part decides which functions
to clone. This is based on the number of clones that have to be created for
the functions marked in the analysis phase. Only two clones are allowed for
a function currently. The fourth part is where the actual cloning happens.
The fifth part contains the implementation to lower the __builtin_dispatch
calls.
Flags : -fclone-hot-version-paths does function unswitching via cloning.
--param=num-mversn-clones=<num> allows to specify the number of
functions that should be cloned.
--param=mversn-clone-depth=<num> allows to specify the length of
the call graph path that should be cloned. num = 0 implies only
leaf node that contains the __builtin_dispatch statement must be
cloned. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "tree-inline.h"
#include "langhooks.h"
#include "flags.h"
#include "cgraph.h"
#include "diagnostic.h"
#include "toplev.h"
#include "timevar.h"
#include "params.h"
#include "fibheap.h"
#include "intl.h"
#include "tree-pass.h"
#include "hashtab.h"
#include "coverage.h"
#include "ggc.h"
#include "tree-flow.h"
#include "rtl.h"
#include "ipa-prop.h"
#include "basic-block.h"
#include "toplev.h"
#include "dbgcnt.h"
#include "tree-dump.h"
#include "output.h"
#include "vecprim.h"
#include "gimple-pretty-print.h"
#include "ipa-inline.h"
typedef struct cgraph_node* NODEPTR;
DEF_VEC_P (NODEPTR);
DEF_VEC_ALLOC_P (NODEPTR, heap);
/* Store the decl of __builtin_dispatch */
static tree builtin_function_decl = NULL;
/* Hash to map name to a decl. Used for variables and functions. */
static htab_t name_decl_htab = NULL;
/* Hashtable helpers for name_decl_htab. */
static hashval_t
name_decl_htab_hash_descriptor (const void *p)
{
const_tree t = (const_tree) p;
const char *name
= (IDENTIFIER_POINTER (DECL_NAME (t)));
return htab_hash_string(name);
}
/* Hashtable helper for name_decl_htab. */
static int
name_decl_htab_eq_descriptor (const void *p1, const void *p2)
{
const_tree t1 = (const_tree) p1;
const char *c1 = IDENTIFIER_POINTER (DECL_NAME (t1));
const char *c2 = (const char *)p2;
return (strcmp (c1, c2) == 0);
}
/* Return true if NODE is a hot function. It is a hot function
if its execution frequency is determined to be hot or
if any of its incoming or outgoing call-graph edges is hot. */
static bool
hot_function_p (struct cgraph_node *node)
{
struct cgraph_edge *edge;
if (node->frequency == NODE_FREQUENCY_HOT)
return true;
for (edge = node->callees; edge; edge = edge->next_callee)
if (cgraph_maybe_hot_edge_p (edge))
return true;
for (edge = node->callers; edge; edge = edge->next_caller)
if (cgraph_maybe_hot_edge_p (edge))
return true;
return false;
}
/* Return the number of arguments that a function has. */
static int
function_args_count (tree fntype)
{
function_args_iterator args_iter;
tree t;
int num = 0;
if (fntype)
{
FOREACH_FUNCTION_ARGS(fntype, t, args_iter)
{
num++;
}
}
return num;
}
/* Return the variable name (global/constructor) to use for the
version_selector function with name of DECL by appending SUFFIX. */
static char *
make_name (tree decl, const char *suffix)
{
char *global_var_name;
int name_len;
const char *name;
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name_len = strlen (name) + strlen (suffix) + 2;
global_var_name = (char *) xmalloc (name_len);
snprintf (global_var_name, name_len, "%s_%s", name, suffix);
return global_var_name;
}
/* Code for handling version_selector attribute functions. Such functions are
run-time constants and need to be executed only once. They are hoisted
to a static constructor and their result is stored in a global.
*/
/* This function returns the global variable / constructor name created
for feature-test functions marked with attribute "version_selector".
The name returned is the DECL name appended with
"version_selector_global" for the variable and
"version_selector_constructor" for the constructor. */
static char*
make_feature_test_global_name (tree decl, bool is_constructor)
{
if (is_constructor)
return make_name (decl, "version_selector_constructor");
return make_name (decl, "version_selector_global");
}
/* This function creates a new VAR_DECL with attributes set
using the parameters. PUBLIK corresponds to TREE_PUBLIC,
EXTERNAL corresponds to DECL_EXTERNAL and comdat is
for DECL_ONE_ONLY. The global variable will have the
same status as the version_selector function.*/
static tree
allocate_new_var (const char *name, int publik,
int external, int comdat)
{
tree new_global_var;
struct varpool_node *vnode;
new_global_var = build_decl (UNKNOWN_LOCATION,
VAR_DECL,
get_identifier (name),
integer_type_node);
DECL_EXTERNAL (new_global_var) = external;
TREE_STATIC (new_global_var) = 1;
TREE_PUBLIC (new_global_var) = publik;
DECL_INITIAL (new_global_var) = 0;
DECL_ARTIFICIAL (new_global_var) = 1;
DECL_PRESERVE_P (new_global_var) = 1;
if (comdat)
make_decl_one_only (new_global_var, DECL_ASSEMBLER_NAME (new_global_var));
assemble_variable (new_global_var, 0, 0, 0);
vnode = varpool_node (new_global_var);
gcc_assert (vnode != NULL);
/* Set finalized to 1, otherwise it asserts in function "write_symbol" in
lto-streamer-out.c. */
vnode->finalized = 1;
return new_global_var;
}
/* Make a new constructor function here to call a feature-test function
and set its body to CONSTRUCTOR_BODY. Its public and comdat
attributes are set from the parameters, PUBLIK, and COMDAT.
VERSION_SELECTOR_VAR is the global decl that saves the result of the
feature-test function in the constructor. */
static tree
make_constructor_function (char *name, gimple constructor_body, int publik,
int comdat, tree version_selector_var)
{
tree decl, type, t;
gimple_seq seq;
basic_block new_bb;
tree old_current_function_decl;
type = build_function_type_list (void_type_node, NULL_TREE);
if (dump_file)
fprintf (dump_file, "Name of new constructor function = %s\n", name);
decl = build_fn_decl (name, type);
DECL_NAME (decl) = get_identifier (name);
SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
gcc_assert (cgraph_get_create_node (decl) != NULL);
TREE_USED (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 0;
TREE_PUBLIC (decl) = publik;
DECL_UNINLINABLE (decl) = 1;
DECL_EXTERNAL (decl) = 0;
DECL_CONTEXT (decl) = NULL_TREE;
DECL_INITIAL (decl) = make_node (BLOCK);
DECL_STATIC_CONSTRUCTOR (decl) = 1;
TREE_READONLY (decl) = 0;
DECL_PURE_P (decl) = 0;
if (comdat)
make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
/* Build result decl and add to function_decl. */
t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
DECL_ARTIFICIAL (t) = 1;
DECL_IGNORED_P (t) = 1;
DECL_RESULT (decl) = t;
gimplify_function_tree (decl);
/* Build CFG for this function. */
old_current_function_decl = current_function_decl;
push_cfun (DECL_STRUCT_FUNCTION (decl));
current_function_decl = decl;
init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
cfun->curr_properties |=
(PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
PROP_ssa);
new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
/* XXX: Not sure if the edge commented below is necessary. If I add this
edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
" if (e->flags & EDGE_FALLTHRU)"
during -fprofile-generate.
Otherwise, it is fine. Deleting this edge does not break anything.
Commenting this so that it is clear I am intentionally not doing this.*/
/* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
seq = gimple_seq_alloc_with_stmt (constructor_body);
set_bb_seq (new_bb, seq);
gimple_set_bb (constructor_body, new_bb);
/* Set the lexical block of the constructor body. Fails the inliner
other wise. */
gimple_set_block (constructor_body, DECL_INITIAL (decl));
/* This call is very important if this pass runs when the IR is in
SSA form. It breaks things in strange ways otherwise. */
init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
add_referenced_var (version_selector_var);
cgraph_add_new_function (decl, true);
cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
cgraph_mark_needed_node (cgraph_get_create_node (decl));
if (dump_file)
dump_function_to_file (decl, dump_file, TDF_BLOCKS);
pop_cfun ();
current_function_decl = old_current_function_decl;
return decl;
}
/* If the current function is marked with attribute
"version_selector" then it is the predicate (feature-test) function
for multi-versioning. Call this function in a constructor and assign
the return value to a global variable.
The constructor's name is the decl name suffixed
"version_selector_constructor" and the global variable's name is the
decl name suffixed with "version_selector_global"
For example, feature-test function isSSE4 marked with attribute
version_selector is converted to
void isSSE4_version_selector_constructor ()
{
isSSE4_version_selector_global = isSSE4 ();
}
This function returns the decl of the global variable.
THIS_DECL is the function decl of the "version_selector" function.
*/
static tree
handle_version_selector_attr_function (tree this_decl)
{
char *global_var_name;
tree version_selector_var = NULL;
void **slot;
gcc_assert (!flag_lto);
if (dump_file)
fprintf (dump_file, "Creating constructor/global for function %s\n",
IDENTIFIER_POINTER (DECL_NAME (this_decl)));
global_var_name = make_feature_test_global_name (this_decl,
false);
slot = htab_find_slot_with_hash (name_decl_htab, global_var_name,
htab_hash_string (global_var_name),
INSERT);
if (*slot == NULL)
{
if (dump_file)
fprintf (dump_file, "Creating global variable %s\n",
global_var_name);
*slot = allocate_new_var (global_var_name,
TREE_PUBLIC (this_decl),
DECL_EXTERNAL (this_decl),
DECL_ONE_ONLY (this_decl));
}
else
{
free (global_var_name);
return (tree) *slot;
}
version_selector_var = (tree) *slot;
/* If the feature-test function is not external, create a constructor and
call this function in the constructor. */
if (!DECL_EXTERNAL (this_decl))
{
char *constructor_name;
gimple constructor_body;
tree constructor_decl;
constructor_name
= make_feature_test_global_name (this_decl, true);
constructor_body = gimple_build_call (this_decl, 0);
gimple_call_set_lhs (constructor_body, version_selector_var);
if (dump_file)
print_gimple_stmt (dump_file, constructor_body, 0, TDF_VOPS);
constructor_decl =
make_constructor_function (constructor_name, constructor_body,
TREE_PUBLIC (this_decl),
DECL_ONE_ONLY (this_decl),
version_selector_var);
gcc_assert (constructor_decl != NULL_TREE);
free (constructor_name);
}
free (global_var_name);
return version_selector_var;
}
/* Start Analysis phase. Mark all functions that are hot and have a call-graph
path to a __builtin_dispatch call. */
/* This function returns the address of the feature test function.
If the address of the function is saved to a temporary,
this function traverses the gimple statements before BUILTIN_STMT
and finds an assignment whose rhs is the feature test function.
If the feature test function is specified as a function pointer
whose function value is unknown, this funcition returns NULL. */
static tree
find_version_selector_func_addr (gimple builtin_stmt)
{
tree cond_func_addr = NULL;
gimple def_stmt = NULL;
cond_func_addr = gimple_call_arg (builtin_stmt, 0);
gcc_assert (TREE_CODE (cond_func_addr) == ADDR_EXPR
|| TREE_CODE (cond_func_addr) == SSA_NAME);
if (TREE_CODE (cond_func_addr) == ADDR_EXPR)
return cond_func_addr;
/* TREE_CODE (cond_func_addr) == SSA_NAME
This means a new function pointer variable is created and assigned the
address of the feature-test function. Traverse the statements backwards
and find the assignment to get the RHS. */
def_stmt = SSA_NAME_DEF_STMT (cond_func_addr);
gcc_assert (def_stmt
&& gimple_assign_lhs (def_stmt) == cond_func_addr);
cond_func_addr = gimple_assign_rhs1 (def_stmt);
/* If the cond_func_addr is still not an ADDR_EXPR, it means that the
feature-test function is specified as a pointer. In this case, we
return NULL, since the feature-test function decl is not known. */
if (cond_func_addr == NULL
|| TREE_CODE (cond_func_addr) != ADDR_EXPR)
return NULL;
/* If the operand of the ADDR_EXPR is not a function_decl, return NULL
as this still means the feature-test function is specified as a
function pointer. */
if (TREE_CODE (TREE_OPERAND (cond_func_addr, 0)) != FUNCTION_DECL)
return NULL;
return cond_func_addr;
}
/* Finds the gimple calls to __builtin_dispatch in function pointed
to by the call graph NODE and populates the vector VEC. Returns
true if at least one statement was found where the feature test
function is marked as "version_selector". Otherwise, there is no
question of hoisting it. */
static bool
is_builtin_dispatch_stmt_present (struct cgraph_node *node,
VEC (tree,heap) **vec)
{
struct cgraph_edge *edge;
bool present = false;
gcc_assert (!flag_lto);
for (edge = node->callees; edge; edge = edge->next_callee)
{
if (edge->callee->decl == builtin_function_decl)
{
tree cond_func_decl;
tree cond_func_addr;
gcc_assert (*vec != NULL);
cond_func_addr = find_version_selector_func_addr (edge->call_stmt);
if (cond_func_addr == NULL)
continue;
cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
/* Do not consider for hoisting if "version_selector" attribute is
not set. */
if (lookup_attribute ("version_selector",
DECL_ATTRIBUTES (cond_func_decl)) == NULL)
{
if (dump_file)
{
fprintf (dump_file, "Not hoisting builtin_dispatch as "
"feature_test function not version_selector :\n");
print_gimple_stmt (dump_file, edge->call_stmt, 0, TDF_VOPS);
}
continue;
}
present = true;
VEC_safe_push (tree, heap, *vec, cond_func_decl);
}
}
return present;
}
/* Updates the list of feature-test function decls reaching the cgraph
function NODE. */
static void
update_reachable_decls_list (struct cgraph_node *node,
VEC (tree, heap) *predicate_decls)
{
VEC (tree, heap) **decl_list = NULL;
tree cond_func_decl;
int ix;
if (node->aux == NULL)
{
decl_list = (VEC (tree, heap) **) xmalloc (sizeof (VEC (tree, heap) *));
*decl_list = VEC_alloc (tree, heap, 1);
node->aux = decl_list;
}
else
decl_list = (VEC (tree, heap) **) node->aux;
for (ix = 0; VEC_iterate (tree, predicate_decls, ix, cond_func_decl); ++ix)
VEC_safe_push (tree, heap, *decl_list, cond_func_decl);
}
/* Propagate the __builtin_dispatch stmt (s) called from node to its
callers, PREDICATE_DECLS is the decls list of the predicate functions. */
static unsigned int
mark_reachable_functions (struct cgraph_node *this_node,
VEC (tree, heap) *predicate_decls)
{
VEC (NODEPTR, heap) *work_list;
VEC (int, heap) *depth_list;
struct cgraph_edge *e;
htab_t node_htab = NULL;
void **slot = NULL;
/* Use a work-list style algorithm to mark functions in any call-graph
path to the current function. */
work_list = VEC_alloc (NODEPTR, heap, 8);
depth_list = VEC_alloc (int, heap, 8);
VEC_safe_push (NODEPTR, heap, work_list, this_node);
VEC_safe_push (int, heap, depth_list, 0);
node_htab = htab_create (10, htab_hash_pointer,
htab_eq_pointer, NULL);
slot = htab_find_slot (node_htab, this_node, INSERT);
gcc_assert (*slot == NULL);
*slot = this_node;
while (!VEC_empty (NODEPTR, work_list))
{
struct cgraph_node *node = VEC_pop (NODEPTR, work_list);
int depth = VEC_pop (int, depth_list);
if (dump_file)
fprintf (dump_file, "%s has a depth = %d callgraph path to %s\n",
cgraph_node_name (node), depth,
cgraph_node_name (this_node));
update_reachable_decls_list (node, predicate_decls);
gcc_assert (node->aux != NULL);
if (depth >= PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH))
{
if (dump_file)
fprintf (dump_file, "Not propogating __builtin_dispatch... "
"maximum cloning depth = %d reached\n",
PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH));
continue;
}
for (e = node->callers; e; e = e->next_caller)
{
slot = htab_find_slot (node_htab, e->caller, INSERT);
if (*slot != NULL)
continue;
*slot = e->caller;
if (!hot_function_p (e->caller))
continue;
VEC_safe_push (NODEPTR, heap, work_list, e->caller);
VEC_safe_push (int, heap, depth_list, (depth + 1));
}
}
htab_delete (node_htab);
VEC_free (NODEPTR, heap, work_list);
VEC_free (int, heap, depth_list);
return 0;
}
/* Scan the call graph and detect hot functions that have __builtin_dispatch
calls. Then, propogate this information to its callers. Returns true if
a suitable __builtin_dispatch was found. */
static bool
perform_analysis_phase (void)
{
struct cgraph_node *node;
VEC(tree, heap) *builtin_predicates_vec = NULL;
bool flag = false;
builtin_predicates_vec = VEC_alloc (tree, heap, 1);
for (node = cgraph_nodes; node; node = node->next)
{
/* if the body of this decl is from outside, do nothing. */
if (DECL_EXTERNAL (node->decl))
continue;
if (!hot_function_p (node))
continue;
if (!is_builtin_dispatch_stmt_present (node, &builtin_predicates_vec))
continue;
if (dump_file)
{
fprintf (dump_file, "%s calls __builtin_dispatch atleast once.\n",
cgraph_node_name (node));
fprintf (dump_file, "%s is a hot function, consider cloning ...\n",
cgraph_node_name (node));
}
flag = true;
mark_reachable_functions (node, builtin_predicates_vec);
VEC_truncate (tree, builtin_predicates_vec, 0);
}
VEC_free (tree, heap, builtin_predicates_vec);
return flag;
}
/* End Analysis phase. */
/* Decide Cloning Phase.
In this phase, we go through each function and decide if it should be
cloned or not. */
/* This function counts the number of unique decls in the DECL_LIST.*/
static int
count_predicate_functions (VEC (tree,heap) *decl_list)
{
int ix;
int count = 0;
tree cond_func_decl = NULL;
htab_t dup_decl_htab = NULL;
if (VEC_length (tree, decl_list) == 1)
return 1;
dup_decl_htab = htab_create (2, htab_hash_pointer, htab_eq_pointer, NULL);
for (ix = 0; VEC_iterate (tree, decl_list, ix, cond_func_decl); ++ix)
{
void **slot = NULL;
slot = htab_find_slot (dup_decl_htab, cond_func_decl, INSERT);
if (*slot != NULL)
continue;
count++;
*slot = cond_func_decl;
}
htab_delete (dup_decl_htab);
return count;
}
/* This function decides which functions to clone based on the number of
feature_test decls reaching it. Currently, only one feature_test decl
is allowed. */
static bool
decide_cloning_phase (void)
{
struct cgraph_node *node;
int count;
bool run_cloning_phase = false;
int num_funcs_cloned = 0;
for (node = cgraph_nodes; node; node = node->next)
{
tree cond_func_decl = NULL;
VEC (tree, heap) *vec;
if (node->aux == NULL)
continue;
if (num_funcs_cloned >= PARAM_VALUE (PARAM_NUMBER_OF_MVERSN_CLONES))
{
if (dump_file)
fprintf (dump_file, "Reached cloning limit specified "
"by \"num-mversn-clones\" for %s\n",
cgraph_node_name (node));
free (node->aux);
node->aux = NULL;
continue;
}
vec = *(VEC (tree,heap) **) node->aux;
count = count_predicate_functions (vec);
gcc_assert (count >= 1);
cond_func_decl = VEC_index (tree, vec, 0);
gcc_assert (cond_func_decl != NULL);
VEC_free (tree, heap, vec);
free (node->aux);
node->aux = NULL;
if (count > 1)
{
if (dump_file)
fprintf (dump_file, "%s has %d predicates, Not cloning for > 1\n",
cgraph_node_name (node), count);
continue;
}
/* Set the node's aux value to be that of the predicate decl. */
node->aux = cond_func_decl;
run_cloning_phase = true;
num_funcs_cloned++;
}
return run_cloning_phase;
}
/* End Decide Cloning Phase. */
/* Cloning Phase. */
/* Deletes all basic-blocks and leaves function with :
ENTRY_BLOCK ---> (new empty basic block) ---> EXIT_BLOCK
*/
static basic_block
empty_function_body (tree fndecl)
{
basic_block bb, new_bb;
edge e;
tree old_current_function_decl;
old_current_function_decl = current_function_decl;
push_cfun (DECL_STRUCT_FUNCTION (fndecl));
current_function_decl = fndecl;
clear_edges ();
for (bb = ENTRY_BLOCK_PTR; bb != NULL;)
{
basic_block bb_next;
bb_next = bb->next_bb;
if (bb != EXIT_BLOCK_PTR
&& bb != ENTRY_BLOCK_PTR)
{
if (bb_seq (bb) != NULL)
{
gimple_stmt_iterator i;
for (i = gsi_start_bb (bb); !gsi_end_p (i);)
{
gimple stmt = gsi_stmt (i);
unlink_stmt_vdef (stmt);
gsi_remove (&i, true);
release_defs (stmt);
}
}
bb->il.gimple = NULL;
bb->prev_bb = NULL;
bb->next_bb = NULL;
SET_BASIC_BLOCK (bb->index, NULL);
n_basic_blocks--;
}
bb = bb_next;
}
ENTRY_BLOCK_PTR->next_bb = EXIT_BLOCK_PTR;
new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
e = make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
gcc_assert (e != NULL);
/* XXX:Is this edge necessary ? */
e = make_edge (new_bb, EXIT_BLOCK_PTR, 0);
gcc_assert (e != NULL);
current_function_decl = old_current_function_decl;
pop_cfun ();
return new_bb;
}
/* Takes function with decl ORIG_FNDECL and clones it. The
name of the clone is the original name suffixed with
NAME_SUFFIX. Code is adapted from cgraph_function_versioning
in cgraphunit.c */
static tree
clone_function (tree orig_fndecl, const char *name_suffix)
{
tree new_decl;
char *new_name;
struct cgraph_node *new_version;
struct cgraph_node *old_version;
void **slot;
tree old_current_function_decl;
new_name = make_name (orig_fndecl, name_suffix);
new_decl = copy_node (orig_fndecl);
slot = htab_find_slot_with_hash (name_decl_htab, new_name,
htab_hash_string (new_name), INSERT);
gcc_assert (*slot == NULL);
*slot = new_decl;
/* Code adapted from cgraph_function_versioning in cgraphuinit.c */
new_version = cgraph_get_create_node (new_decl);
old_version = cgraph_get_create_node (orig_fndecl);
new_version->local = old_version->local;
new_version->global = old_version->global;
new_version->rtl = old_version->rtl;
new_version->reachable = true;
new_version->count = old_version->count;
/* Set the name of the new function. */
DECL_NAME (new_decl) = get_identifier (new_name);
SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
SET_DECL_RTL (new_decl, NULL);
tree_function_versioning (orig_fndecl, new_decl, NULL /*tree_map*/,
false, NULL /*args_to_skip*/,
false, /* skip return */
NULL /* blocks_to_copy */ ,
NULL /* new_entry */);
old_current_function_decl = current_function_decl;
push_cfun (DECL_STRUCT_FUNCTION (new_decl));
current_function_decl = new_decl;
TREE_READONLY (new_decl) = TREE_READONLY (orig_fndecl);
TREE_STATIC (new_decl) = TREE_STATIC (orig_fndecl);
TREE_USED (new_decl) = TREE_USED (orig_fndecl);
DECL_ARTIFICIAL (new_decl) = 1;
DECL_IGNORED_P (new_decl) = 0;
TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_fndecl);
DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_fndecl);
DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_fndecl);
DECL_COMDAT (new_decl) = DECL_COMDAT (orig_fndecl);
DECL_COMDAT_GROUP (new_decl) = DECL_COMDAT_GROUP (orig_fndecl);
DECL_VIRTUAL_P (new_decl) = DECL_VIRTUAL_P (orig_fndecl);
DECL_WEAK (new_decl) = DECL_WEAK (orig_fndecl);
/* Always inline the clones. Why are we cloning otherwise? */
DECL_DECLARED_INLINE_P (new_decl) = 1;
DECL_UNINLINABLE (new_decl) = 0;
new_version->local.externally_visible
= old_version->local.externally_visible;
new_version->local.local
= old_version->local.local;
new_version->analyzed = true;
new_version->lowered = true;
if (dump_file)
dump_function_to_file (new_decl, dump_file, TDF_BLOCKS);
cgraph_add_new_function (new_decl, true);
cgraph_call_function_insertion_hooks (new_version);
cgraph_mark_needed_node (new_version);
pop_cfun ();
current_function_decl = old_current_function_decl;
return new_decl;
}
/* This function populates the vector *VEC with the args in the gimple
call statement STMT. SKIP_ARGS is the number of args to skip.*/
static void
get_function_args (gimple stmt, int num_args, VEC (tree, heap) **vec,
int skip_args)
{
int i;
if (num_args == 0) return;
*vec = VEC_alloc (tree, heap, num_args);
/* The number of args in a function is 1 plus the actual number of
args. Also, there are 3 special args reserved, so the first arg
starts from 3. */
for (i = 0; i <= num_args - 2; ++i)
VEC_quick_push (tree, *vec, gimple_call_arg (stmt, (skip_args + i)));
}
/* Given ret = __builtin_dispatch (pred, fn1, fn2, arg1, ....)
get ret = fn1 (arg1, ...) or ret = fn2 (arg1, ....)
depending on the value of SIDE == 0 or 1. */
static gimple
make_specialized_call_from_builtin (gimple builtin_stmt, int side)
{
tree func_addr;
int num_func_args = 0;
VEC (tree, heap) *nargs = NULL;
tree lhs_stmt;
gimple specialized_call_stmt;
if (side == 0)
func_addr = gimple_call_arg (builtin_stmt, 1);
else
func_addr = gimple_call_arg (builtin_stmt, 2);
num_func_args
= function_args_count (TREE_TYPE (TREE_OPERAND (func_addr, 0)));
get_function_args (builtin_stmt, num_func_args, &nargs, 3);
specialized_call_stmt = gimple_build_call_vec (func_addr, nargs);
lhs_stmt = gimple_call_lhs (builtin_stmt);
if (lhs_stmt != NULL_TREE)
gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
if (nargs != NULL)
VEC_free (tree, heap, nargs);
return specialized_call_stmt;
}
/* Given a call (GENERIC_STMT) to a function that is cloned, substitute
with a call to the correct clone. */
static gimple
make_specialized_call_to_clone (gimple generic_stmt, int side)
{
tree new_decl;
char *new_name;
tree generic_fndecl;
gimple specialized_call_stmt;
void **slot;
int num_func_args;
tree lhs_stmt;
VEC (tree, heap) *nargs= NULL;
generic_fndecl = gimple_call_fndecl (generic_stmt);
gcc_assert (generic_fndecl != NULL);
if (side == 0)
new_name = make_name (generic_fndecl, "clone_0");
else
new_name = make_name (generic_fndecl, "clone_1");
slot = htab_find_slot_with_hash (name_decl_htab, new_name,
htab_hash_string (new_name), NO_INSERT);
gcc_assert (slot != NULL);
new_decl = (tree) *slot;
gcc_assert (new_decl);
num_func_args = function_args_count (TREE_TYPE (generic_fndecl));
get_function_args (generic_stmt, num_func_args, &nargs, 0);
specialized_call_stmt = gimple_build_call_vec (new_decl, nargs);
lhs_stmt = gimple_call_lhs (generic_stmt);
if (lhs_stmt != NULL_TREE)
gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
if (nargs != NULL)
VEC_free (tree, heap, nargs);
return specialized_call_stmt;
}
/* Returns true if STMT is a call to __builtin_dispatch and its
predicate feature-test function is marked with attribute
"version_selector". */
static bool
is_builtin_with_predicate_version_selector (gimple stmt)
{
tree cond_func_addr, cond_func_decl;
gcc_assert (!flag_lto);
if (gimple_call_fndecl (stmt) != builtin_function_decl)
return false;
cond_func_addr = find_version_selector_func_addr (stmt);
if (cond_func_addr == NULL)
return false;
cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
if (lookup_attribute ("version_selector",
DECL_ATTRIBUTES (cond_func_decl)) != NULL)
return true;
return false;
}
/* Find calls to __builtin_dispatch or to functions that are versioned
in CLONE_DECL and substitute the call with the correct version based
on the value of SIDE. */
static void
specialize_call (tree clone_decl, int side)
{
basic_block bb;
tree old_current_function_decl;
old_current_function_decl = current_function_decl;
push_cfun (DECL_STRUCT_FUNCTION (clone_decl));
current_function_decl = clone_decl;
/* Iterate over call edges and find out if there is
a call to __builtin_dispatch or a cloned function.
We cannot iterate over call graph edges as there are
no edges for the clones yet. */
FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (clone_decl))
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
tree callee_decl;
struct cgraph_node *callee_node;
gimple specialized_call_stmt;
gimple stmt = gsi_stmt (gsi);
if (!is_gimple_call (stmt))
continue;
callee_decl = gimple_call_fndecl (stmt);
if (callee_decl == NULL)
continue;
callee_node = cgraph_get_create_node (callee_decl);
/* For a __builtin_dispatch stmt, only specialize if
version_selector attribute is set. Otherwise, it is
not hoisted, so no specialization. */
if (is_builtin_with_predicate_version_selector (stmt))
{
specialized_call_stmt =
make_specialized_call_from_builtin (stmt, side);
}
else if (callee_node->aux != NULL)
{
specialized_call_stmt =
make_specialized_call_to_clone (stmt, side);
}
else
continue;
if (dump_file)
{
fprintf (dump_file, "Specialize stmt : \n");
print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
fprintf (dump_file, "Specialized stmt : \n");
print_gimple_stmt (dump_file, specialized_call_stmt,
0, TDF_VOPS);
}
gimple_set_block (specialized_call_stmt, gimple_block (stmt));
gsi_insert_before_without_update (&gsi, specialized_call_stmt,
GSI_SAME_STMT);
unlink_stmt_vdef (stmt);
gsi_remove (&gsi, true);
mark_symbols_for_renaming (specialized_call_stmt);
/* After removing make sure gsi is set correctly to not skip
a statememt. */
gsi = gsi_for_stmt (specialized_call_stmt);
}
}
current_function_decl = old_current_function_decl;
pop_cfun ();
}
/* When a function is version cloned, its body is replaced to call one
of the versions with the feature-test function acting as a predicate.
This is done with __builtin_dispatch which is later expanded. */
static gimple
make_builtin_call_to_clones (tree orig_fndecl, tree clone_0_addr,
tree clone_1_addr, tree cond_func_addr)
{
gimple new_builtin_call;
VEC(tree, heap) *vargs = VEC_alloc (tree, heap, 4);
tree arg;
VEC_quick_push (tree, vargs, cond_func_addr);
VEC_quick_push (tree, vargs, clone_0_addr);
VEC_quick_push (tree, vargs, clone_1_addr);
for (arg = DECL_ARGUMENTS (orig_fndecl); arg; arg = TREE_CHAIN (arg))
{
VEC_safe_push (tree, heap, vargs, arg);
/* Again, this add_referenced_var is very very important. It broke
a build where a cloned function's arguments where never
referenced. Missing this statement in places asserts at
tree-dfa.c:589, in function referenced_var_lookup at
"gcc_assert (h || uid == 0);" and is very difficult to triage. */
add_referenced_var (arg);
}
new_builtin_call = gimple_build_call_vec (builtin_function_decl, vargs);
mark_symbols_for_renaming (new_builtin_call);
if (dump_file)
print_gimple_stmt (dump_file, new_builtin_call, 0, TDF_VOPS);
VEC_free (tree, heap, vargs);
return new_builtin_call;
}
/* This clones a dispatch function whose callee-graph path has a function
which calls __builtin_dispatch. This function is cloned and the
original function branches to the right clone. */
static int
clone_and_dispatch_function (struct cgraph_node *orig_node, tree *clone_0,
tree *clone_1)
{
tree clone_0_decl, clone_1_decl;
gimple new_builtin_call = NULL;
gimple new_return_stmt = NULL;
gimple_seq seq = NULL;
basic_block new_bb;
tree orig_fndecl;
tree return_var = NULL;
tree return_type;
tree old_current_function_decl;
old_current_function_decl = current_function_decl;
orig_fndecl = orig_node->decl;
push_cfun (DECL_STRUCT_FUNCTION (orig_fndecl));
current_function_decl = orig_fndecl;
/* Make 2 clones for true and false function. */
clone_0_decl = clone_function (orig_fndecl, "clone_0");
clone_1_decl = clone_function (orig_fndecl, "clone_1");
*clone_0 = clone_0_decl;
*clone_1 = clone_1_decl;
new_bb = empty_function_body (orig_fndecl);
new_builtin_call = make_builtin_call_to_clones (
orig_fndecl,
build_fold_addr_expr (clone_0_decl),
build_fold_addr_expr (clone_1_decl),
build_fold_addr_expr ((tree)orig_node->aux));
return_type = TREE_TYPE (TREE_TYPE (orig_fndecl));
if (!TREE_ADDRESSABLE (return_type) && COMPLETE_TYPE_P (return_type))
{
tree tmp_var;
tmp_var = create_tmp_var (return_type, NULL);
add_referenced_var (tmp_var);
return_var = make_ssa_name (tmp_var, new_builtin_call);
gimple_call_set_lhs (new_builtin_call, return_var);
}
mark_symbols_for_renaming (new_builtin_call);
new_return_stmt = gimple_build_return (return_var);
mark_symbols_for_renaming (new_return_stmt);
gimple_seq_add_stmt (&seq, new_builtin_call);
gimple_seq_add_stmt (&seq, new_return_stmt);
set_bb_seq (new_bb, seq);
gimple_set_bb (new_builtin_call, new_bb);
gimple_set_bb (new_return_stmt, new_bb);
gimple_set_block (new_builtin_call, DECL_INITIAL (orig_fndecl));
gimple_set_block (new_return_stmt, DECL_INITIAL (orig_fndecl));
if (dump_file)
dump_function_to_file (orig_fndecl, dump_file, TDF_BLOCKS);
/* This update_ssa is necessary here for the following reason. SSA uses
a global syms_to_rename bitmap that stores syms that must be renamed.
So, if we accumulate the syms from one function in IPA but move to
a different function without updating SSA, then we could be
accumulating syms from many functions. This would assert in
referenced_var_lookup because the hashtab storing the syms is
function local. This is horrible. gcc-4.6 makes this bitmap a
global. */
update_ssa (TODO_update_ssa);
compute_inline_parameters (cgraph_get_create_node (orig_fndecl), false);
DECL_DECLARED_INLINE_P (orig_fndecl) = 1;
DECL_UNINLINABLE (orig_fndecl) = 0;
current_function_decl = old_current_function_decl;
pop_cfun ();
return 0;
}
/* Clone all functions marked for cloning by the earlier phase. */
static void
perform_cloning_phase (void)
{
struct cgraph_node *node;
int ix;
VEC (tree, heap) *cloned_decl_list = NULL;
tree cloned_decl = NULL;
cloned_decl_list = VEC_alloc (tree, heap, 2);
/* First clone, then specialize the clones. */
for (node = cgraph_nodes; node; node = node->next)
{
tree clone_0_decl, clone_1_decl;
if (node->aux == NULL)
continue;
if (dump_file)
{
fprintf (dump_file, "%s will be cloned\n", cgraph_node_name (node));
dump_function_to_file (node->decl, dump_file, TDF_BLOCKS);
}
clone_and_dispatch_function (node, &clone_0_decl, &clone_1_decl);
VEC_safe_push (tree, heap, cloned_decl_list, clone_0_decl);
VEC_safe_push (tree, heap, cloned_decl_list, clone_1_decl);
continue;
}
/* Specialize the clones now. */
for (ix = 0; VEC_iterate (tree, cloned_decl_list, ix, cloned_decl); ++ix)
{
int which_clone = ix % 2;
specialize_call (cloned_decl, which_clone);
}
VEC_free (tree, heap, cloned_decl_list);
}
/* End Cloning phase. */
/* Checks if there is atleast one call to __builtin_dispatch. */
static bool
find_builtin_decl (void)
{
struct cgraph_node *node;
for (node = cgraph_nodes; node; node = node->next)
{
if (strstr (cgraph_node_name (node), "__builtin_dispatch") != NULL)
{
builtin_function_decl = node->decl;
return true;
}
}
return false;
}
/* Set the aux fields of all nodes and edges in the call graph to be NULL. */
static void
cleanup_aux_field (void)
{
struct cgraph_node *node;
struct cgraph_edge *edge;
for (node = cgraph_nodes; node; node = node->next)
{
node->aux = NULL;
for (edge = node->callees; edge; edge = edge->next_callee)
edge->aux = NULL;
}
}
/* Main driver function. It scans the __builtin_dispatch calls and
figures out which functions to clone. It then clones the functions. */
static unsigned int
builtin_dispatch_ipa_clone (void)
{
cleanup_aux_field ();
/* Allocate hashtab mapping name to decl. */
name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
name_decl_htab_eq_descriptor, NULL);
/* Turn it on for O1 and above. At -O0, there is a SSA alias bug
with create_tmp_var. Cloning and hoisting is not necessary at
-O0 anyways. Also, guard it with the flag
"-fclone-hot-version-paths".
Disabled for LTO as it needs more work. */
if (optimize == 0
|| profile_arc_flag
|| !flag_clone_hot_version_paths
|| flag_lto)
return 0;
if (!find_builtin_decl ())
return 0;
gcc_assert (builtin_function_decl != NULL);
if (!perform_analysis_phase ())
{
cleanup_aux_field ();
return 0;
}
if (decide_cloning_phase ())
perform_cloning_phase ();
cleanup_aux_field ();
return 0;
}
static bool
gate_handle_builtin_dispatch (void)
{
return true;
}
struct simple_ipa_opt_pass pass_ipa_multiversion_dispatch =
{
{
SIMPLE_IPA_PASS,
"multiversion_dispatch", /* name */
gate_handle_builtin_dispatch, /* gate */
builtin_dispatch_ipa_clone, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_MVERSN_DISPATCH, /* tv_id */
0, /* properties_required */
PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | /* todo_flags_finish */
TODO_update_ssa
}
};
/* Lowering of the __builtin_dispatch calls. */
/* This function converts STMT which is a __builtin_dispatch
call of the form :
ret = __builtin_dispatch (predicate, foo, bar, arg1, ...)
into :
var_1 = predicate
if (var_1)
var_2 = foo (arg1, ...);
else
var_3 = bar (arg1, ...);
var_4 = phi (var_2, var_3)
ret = var_4
var_? are ssa names for variable var.
*/
static unsigned int
convert_builtin_dispatch (gimple stmt)
{
tree cond_func_addr, if_func_addr, else_func_addr;
tree cond_func_decl = NULL;
gimple if_part, else_part, if_else_stmt;
basic_block bb1, bb2, bb3, bb4;
gimple bb1end, bb2end, bb3end;
edge e12, e13, e23, e24, e34;
VEC(tree, heap) *nargs = NULL;
int num_func_args = 0, i;
tree version_selector_var;
tree lhs_result;
gimple_stmt_iterator gsi;
basic_block bb;
gimple feature_test_call = NULL;
tree tmp_var = NULL;
gimple init_stmt = NULL;
tree ssa_if_name, ssa_else_name;
gimple phinode = NULL;
tree tmp_result_var, ssa_result_var;
gsi = gsi_for_stmt (stmt);
bb = gsi_bb (gsi);
cond_func_addr = find_version_selector_func_addr (stmt);
if (cond_func_addr != NULL)
{
cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
gcc_assert (cond_func_decl);
}
if (dump_file)
{
fprintf (dump_file, "Before Converting __builtin_dispatch :\n");
dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
}
if_func_addr = gimple_call_arg (stmt, 1);
else_func_addr = gimple_call_arg (stmt, 2);
tmp_result_var = create_tmp_var (integer_type_node, NULL);
add_referenced_var (tmp_result_var);
if (flag_lto
|| cond_func_decl == NULL
|| lookup_attribute ("version_selector",
DECL_ATTRIBUTES (cond_func_decl)) == NULL)
{
tree arg = gimple_call_arg (stmt, 0);
/* This means the feature-test function is not set with attribute
version_selector or it is a function pointer or in LTO. So,
explicitly call it. */
feature_test_call = gimple_build_call (arg, 0);
ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
gimple_call_set_lhs (feature_test_call, ssa_result_var);
mark_symbols_for_renaming (feature_test_call);
version_selector_var = ssa_result_var;
}
else
{
/* Get the global corresponding to the "version_selector" function. */
version_selector_var
= handle_version_selector_attr_function (cond_func_decl);
gcc_assert (version_selector_var);
add_referenced_var (version_selector_var);
feature_test_call = gimple_build_assign (tmp_result_var,
version_selector_var);
ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
gimple_assign_set_lhs (feature_test_call, ssa_result_var);
mark_symbols_for_renaming (feature_test_call);
version_selector_var = ssa_result_var;
}
if_else_stmt = gimple_build_cond (GT_EXPR,
version_selector_var,
integer_zero_node,
NULL_TREE, NULL_TREE);
mark_symbols_for_renaming (if_else_stmt);
num_func_args = function_args_count (
TREE_TYPE (TREE_OPERAND (if_func_addr, 0)));
nargs = VEC_alloc (tree, heap, num_func_args);
/* The arguments to the feature test function start from the 4th argument
in __builtin_dispatch. The first 3 arguments are mandatory. */
for (i = 0; i <= num_func_args - 2; ++i)
VEC_quick_push (tree, nargs,
gimple_call_arg (stmt, (3 + i)));
if_part = gimple_build_call_vec (if_func_addr, nargs);
else_part = gimple_build_call_vec (else_func_addr, nargs);
lhs_result = gimple_call_lhs (stmt);
if (lhs_result != NULL_TREE)
{
tree ssa_var;
tree return_type;
return_type = TREE_TYPE (lhs_result);
tmp_var = create_tmp_var (return_type, NULL);
add_referenced_var (tmp_var);
init_stmt = gimple_build_assign (tmp_var, build_zero_cst (return_type));
ssa_var = make_ssa_name (tmp_var, init_stmt);
gimple_assign_set_lhs (init_stmt, ssa_var);
mark_symbols_for_renaming (init_stmt);
ssa_if_name = make_ssa_name (tmp_var, init_stmt);
ssa_else_name = make_ssa_name (tmp_var, init_stmt);
gimple_call_set_lhs (if_part, ssa_if_name);
gimple_call_set_lhs (else_part, ssa_else_name);
}
mark_symbols_for_renaming (if_part);
mark_symbols_for_renaming (else_part);
/* Set the lexical block to be the same as the dispatch call. */
gcc_assert (feature_test_call);
gimple_set_block (feature_test_call, gimple_block (stmt));
if (init_stmt)
gimple_set_block (init_stmt, gimple_block (stmt));
gimple_set_block (if_else_stmt, gimple_block (stmt));
gimple_set_block (if_part, gimple_block (stmt));
gimple_set_block (else_part, gimple_block (stmt));
gsi_insert_before_without_update (&gsi, feature_test_call, GSI_SAME_STMT);
gimple_set_bb (feature_test_call, bb);
if (init_stmt)
{
gsi_insert_before_without_update (&gsi, init_stmt,
GSI_SAME_STMT);
gimple_set_bb (init_stmt, bb);
}
gsi_insert_before_without_update (&gsi, if_else_stmt, GSI_SAME_STMT);
gsi_insert_before_without_update (&gsi, if_part, GSI_SAME_STMT);
gsi_insert_before_without_update (&gsi, else_part, GSI_SAME_STMT);
/* Remove the builtin_dispatch call after the expansion. */
unlink_stmt_vdef (stmt);
gsi_remove (&gsi, true);
bb1end = if_else_stmt;
bb2end = if_part;
bb3end = else_part;
bb1 = bb;
e12 = split_block (bb1, bb1end);
bb2 = e12->dest;
e23 = split_block (bb2, bb2end);
bb3 = e23->dest;
e34 = split_block (bb3, bb3end);
bb4 = e34->dest;
e12->flags &= ~EDGE_FALLTHRU;
e12->flags |= EDGE_TRUE_VALUE;
e13 = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
gcc_assert (e13);
e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
gcc_assert (e24);
remove_edge (e23);
if (tmp_var)
{
gimple assign_stmt;
phinode = create_phi_node (tmp_var, bb4);
add_phi_arg (phinode, ssa_if_name, e24, UNKNOWN_LOCATION);
add_phi_arg (phinode, ssa_else_name, e34, UNKNOWN_LOCATION);
mark_symbols_for_renaming (phinode);
gcc_assert (lhs_result);
assign_stmt
= gimple_build_assign (lhs_result, gimple_phi_result (phinode));
mark_symbols_for_renaming (assign_stmt);
gsi = gsi_start_bb (bb4);
gsi_insert_before_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
gimple_set_bb (assign_stmt, bb4);
}
if (dump_file)
{
fprintf (dump_file, "Converted __builtin_dispatch :\n");
dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
}
return 0;
}
/* This function does two things.
1) For a feature-test function marked with attribute "version_selector",
it creates a constructor that calls the feature-test function and a
global that holds the result. The global's result will be used
to lower any __builtin_dispatch statement that refers to this feature
test function. The __builtin_dispatch statement and the feature test
function can be in different modules.
2) It lowers __builtin_dispatch statements. */
static unsigned int
do_convert_builtin_dispatch (void)
{
basic_block bb;
gimple_stmt_iterator gsi;
VEC (gimple, heap) *builtin_stmt_list = NULL;
int ix;
gimple builtin_stmt;
/* Allocate hashtab mapping name to decl. */
if (name_decl_htab == NULL)
name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
name_decl_htab_eq_descriptor, NULL);
/* Look for functions with attribute "version_selector" and make a
constructor which calls the function and saves the result in a
global. Disabled for LTO as it needs more work. */
if (!flag_lto
&& lookup_attribute ("version_selector",
DECL_ATTRIBUTES (current_function_decl)) != NULL)
{
if (dump_file)
fprintf (dump_file, "Function with version_selector attribute found :"
" %s. Making constructor for it.\n",
current_function_name ());
handle_version_selector_attr_function (current_function_decl);
/* Assume there are no __builtin_dispatch calls in feature test
functions. So it is safe to return. */
return 0;
}
/* Find and lower __builtin_dispatch calls. */
FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
{
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
tree call_decl;
if (!is_gimple_call (stmt))
continue;
call_decl = gimple_call_fndecl (stmt);
if (call_decl == NULL)
continue;
if (DECL_NAME (call_decl) == NULL_TREE)
continue;
if (strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
"__builtin_dispatch") == NULL)
continue;
if (dump_file)
{
fprintf (dump_file, "Converting __builtin_dispatch stmt in:%s\n",
current_function_name ());
print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
}
if (builtin_stmt_list == NULL)
builtin_stmt_list = VEC_alloc (gimple, heap, 2);
gcc_assert (builtin_stmt_list != NULL);
VEC_safe_push (gimple, heap, builtin_stmt_list, stmt);
}
}
if (!builtin_stmt_list)
return 0;
for (ix = 0; VEC_iterate (gimple, builtin_stmt_list, ix, builtin_stmt);
++ix)
convert_builtin_dispatch (builtin_stmt);
compute_inline_parameters (cgraph_get_create_node (current_function_decl),
false);
VEC_free (gimple, heap, builtin_stmt_list);
return 0;
}
static bool
gate_convert_builtin_dispatch (void)
{
return true;
}
struct gimple_opt_pass pass_tree_convert_builtin_dispatch =
{
{
GIMPLE_PASS,
"convert_builtin_dispatch", /* name */
gate_convert_builtin_dispatch, /* gate */
do_convert_builtin_dispatch, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_MVERSN_DISPATCH, /* tv_id */
PROP_cfg, /* properties_required */
PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | /* todo_flags_finish */
TODO_cleanup_cfg | TODO_dump_cgraph |
TODO_update_ssa | TODO_verify_ssa
}
};