blob: d8c27499b575384bf4af5835621a4b5d89a18fa1 [file] [log] [blame]
/* **********************************************************
* Copyright (c) 2012-2014 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/
/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
/* Copyright (c) 2003-2007 Determina Corp. */
/* Copyright (c) 2001-2003 Massachusetts Institute of Technology */
/* Copyright (c) 2000-2001 Hewlett-Packard Company */
/*
* emit.c - fragment code generation routines
*/
#include "globals.h"
#include "link.h"
#include "fragment.h"
#include "fcache.h"
#include "proc.h"
#include "instrlist.h"
#include "emit.h"
#include "instrlist.h"
#include "instr.h"
#include "monitor.h"
#include "translate.h"
#include <string.h> /* memcpy */
#ifdef DEBUG
# include "decode_fast.h" /* for decode_next_pc for stress_recreate_pc */
#endif
#define STATS_FCACHE_ADD(flags, stat, val) DOSTATS({ \
if (TEST(FRAG_SHARED, (flags))) { \
if (IN_TRACE_CACHE(flags)) \
STATS_ADD(fcache_shared_trace_##stat, val);\
else \
STATS_ADD(fcache_shared_bb_##stat, val); \
} \
else if (IN_TRACE_CACHE(flags)) \
STATS_ADD(fcache_trace_##stat, val); \
else \
STATS_ADD(fcache_bb_##stat, val); \
})
#ifdef INTERNAL
/* case 4344 - verify we can recreate app pc in fragment, returns the pc of
* the last instruction in the body of f */
static cache_pc
get_last_fragment_body_instr_pc(dcontext_t *dcontext, fragment_t *f)
{
cache_pc body_last_inst_pc;
linkstub_t *l;
/* Assumption : the last exit stub exit cti is the last instruction in the
* body. PR 215217 enforces this for CLIENT_INTERFACE as well. */
l = FRAGMENT_EXIT_STUBS(f);
/* never called on future fragments, so a stub should exist */
while (!LINKSTUB_FINAL(l))
l = LINKSTUB_NEXT_EXIT(l);
body_last_inst_pc = EXIT_CTI_PC(f, l);
return body_last_inst_pc;
}
void
stress_test_recreate(dcontext_t *dcontext, fragment_t *f,
instrlist_t *ilist)
{
cache_pc body_end_pc = get_last_fragment_body_instr_pc(dcontext, f);
app_pc recreated_pc;
LOG(THREAD, LOG_MONITOR, 2, "Testing recreating Fragment %d for tag "PFX" at "PFX"\n",
f->id, f->tag, f->start_pc);
DOLOG(3, LOG_INTERP, {
/* visualize translation info if it were to be recorded for every
* fragment, not just deleted ones -- for debugging only
*/
translation_info_t *info = record_translation_info(dcontext, f, NULL);
translation_info_print(info, f->start_pc, THREAD);
translation_info_free(dcontext, info);
/* handy reference of app code and fragment -- only 1st part of trace though */
DOLOG(3, LOG_INTERP, { disassemble_app_bb(dcontext, f->tag, THREAD); });
DOLOG(3, LOG_INTERP, { disassemble_fragment(dcontext, f, false); });
});
DOLOG(2, LOG_MONITOR, {
/* Translate them all.
* Useful when verifying manually, o/w we just ensure no asserts or crashes.
*/
cache_pc cpc = f->start_pc;
while (cpc <= body_end_pc) {
recreated_pc = recreate_app_pc(dcontext, cpc, NULL/*for full test*/);
LOG(THREAD, LOG_MONITOR, 2, "\ttranslated cache "PFX" => app "PFX"\n",
cpc, recreated_pc);
cpc = decode_next_pc(dcontext, cpc);
}
});
recreated_pc = recreate_app_pc(dcontext, body_end_pc, NULL/*for full test*/);
/* FIXME: we should figure out how to test each instruction, while knowing the app state */
LOG(THREAD, LOG_MONITOR, 2, "Testing recreating Fragment #%d recreated_pc="PFX"\n",
GLOBAL_STAT(num_fragments), recreated_pc);
ASSERT(recreated_pc != NULL);
if (INTERNAL_OPTION(stress_recreate_state) && ilist != NULL)
stress_test_recreate_state(dcontext, f, ilist);
}
#endif /* INTERNAL */
/* here instead of link.c b/c link.c doesn't deal w/ Instrs */
bool
final_exit_shares_prev_stub(dcontext_t *dcontext, instrlist_t *ilist, uint frag_flags)
{
/* if a cbr is final exit pair, should they share a stub? */
if (INTERNAL_OPTION(cbr_single_stub) && !TEST(FRAG_COARSE_GRAIN, frag_flags)) {
/* don't need to expand since is_exit_cti will rule out level 0 */
instr_t *inst = instrlist_last(ilist);
/* FIXME: we could support code after the last cti (this is ubr so
* would be out-of-line code) or between cbr and ubr but for
* simplicity of identifying exits for traces we don't
*/
if (instr_is_exit_cti(inst) && instr_is_ubr(inst)) {
/* don't need to expand since is_exit_cti will rule out level 0 */
instr_t *prev_cti = instr_get_prev(inst);
if (prev_cti != NULL && instr_is_exit_cti(prev_cti)
/* cti_loop is fine since cti points to loop instr, enabling
* our disambiguation to know which state to look at */
&& instr_is_cbr(prev_cti)
#ifdef CUSTOM_EXIT_STUBS
/* no custom code */
&& instr_exit_stub_code(prev_cti) == NULL
&& instr_exit_stub_code(inst) == NULL
#endif
#ifdef PROFILE_LINKCOUNT
/* no linkcount code, which is only present in traces */
&& (!TEST(FRAG_IS_TRACE, frag_flags) || !INTERNAL_OPTION(profile_counts))
#endif
/* no separate freeing */
&& ((TEST(FRAG_SHARED, frag_flags) &&
!DYNAMO_OPTION(unsafe_free_shared_stubs)) ||
(!TEST(FRAG_SHARED, frag_flags) &&
!DYNAMO_OPTION(free_private_stubs)))) {
return true;
}
}
}
return false;
}
/* Walks ilist and f's linkstubs, setting each linkstub_t's fields appropriately
* for the corresponding exit cti in ilist.
* If emit is true, also encodes each instr in ilist to f's cache slot,
* increments stats for new fragments, and returns the final pc after all encodings.
*/
cache_pc
set_linkstub_fields(dcontext_t *dcontext, fragment_t *f, instrlist_t *ilist,
uint num_direct_stubs, uint num_indirect_stubs, bool emit)
{
uint i;
bool frag_offs_at_end;
linkstub_t *l;
cache_pc pc;
instr_t *inst;
app_pc target;
DEBUG_DECLARE(instr_t *prev_cti = NULL;)
pc = FCACHE_ENTRY_PC(f);
l = FRAGMENT_EXIT_STUBS(f);
i = 0;
frag_offs_at_end = linkstub_frag_offs_at_end(f->flags, num_direct_stubs,
num_indirect_stubs);
for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) {
if (instr_is_exit_cti(inst)) {
/* l is currently zeroed out but otherwise uninitialized
* stub starts out as unlinked and never-been-linked
*/
ASSERT(l->flags == 0);
i++;
if (i == num_direct_stubs + num_indirect_stubs) {
/* set final flag */
l->flags |= LINK_END_OF_LIST;
}
if (frag_offs_at_end)
l->flags |= LINK_FRAG_OFFS_AT_END;
DODEBUG({
if (emit && is_exit_cti_patchable(dcontext, inst, f->flags)) {
uint off = patchable_exit_cti_align_offs(dcontext, inst, pc);
if (off > 0) {
ASSERT(!PAD_FRAGMENT_JMPS(f->flags));
STATS_PAD_JMPS_ADD(f->flags, unaligned_exits, 1);
STATS_PAD_JMPS_ADD(f->flags, unaligned_exit_bytes, off);
}
}
});
/* An alternative way of testing this is to match with
* is_return_lookup_routine() whenever we get that */
/* FIXME: doing the above is much easier now and it is more reliable
* than expecting the branch type flags to propagate through
*/
l->flags |= instr_exit_branch_type(inst);
target = instr_get_branch_target_pc(inst);
if (is_indirect_branch_lookup_routine(dcontext, (cache_pc)target)) {
ASSERT(IF_WINDOWS_ELSE_0(is_shared_syscall_routine(dcontext, target)) ||
is_ibl_routine_type(dcontext, (cache_pc)target,
extract_branchtype((ushort)instr_exit_branch_type(inst))));
/* this is a mangled form of an original indirect
* branch or is a mangled form of an indirect branch
* to a real native pc out of the fragment
*/
l->flags |= LINK_INDIRECT;
ASSERT(!LINKSTUB_DIRECT(l->flags));
ASSERT(!LINKSTUB_NORMAL_DIRECT(l->flags));
ASSERT(!LINKSTUB_CBR_FALLTHROUGH(l->flags));
ASSERT(LINKSTUB_INDIRECT(l->flags));
}
else {
DOSTATS({
if (emit) {
if (PTR_UINT_ABS(target - f->tag) > SHRT_MAX) {
if (num_indirect_stubs == 0 && num_direct_stubs == 2 &&
i == 2)
STATS_INC(num_bb_fallthru_far);
STATS_INC(num_bb_exit_tgt_far);
} else {
if (num_indirect_stubs == 0 && num_direct_stubs == 2 &&
i == 2)
STATS_INC(num_bb_fallthru_near);
STATS_INC(num_bb_exit_tgt_near);
}
}
});
if (LINKSTUB_FINAL(l) &&
use_cbr_fallthrough_short(f->flags, num_direct_stubs,
num_indirect_stubs)) {
/* this is how we mark a cbr fallthrough, w/ both
* LINK_DIRECT and LINK_INDIRECT
*/
l->flags |= LINK_DIRECT | LINK_INDIRECT;
/* ensure our macros are in synch */
ASSERT(LINKSTUB_DIRECT(l->flags));
ASSERT(!LINKSTUB_NORMAL_DIRECT(l->flags));
ASSERT(LINKSTUB_CBR_FALLTHROUGH(l->flags));
ASSERT(!LINKSTUB_INDIRECT(l->flags));
DOSTATS({
if (emit)
STATS_INC(num_bb_cbr_fallthru_shrink);
});
ASSERT(prev_cti != NULL && instr_is_cbr(prev_cti));
/* should always qualify for single stub */
ASSERT(!INTERNAL_OPTION(cbr_single_stub) ||
/* FIXME: this duplicates calc of final_cbr_single_stub
* bool cached in emit_fragment_common()
*/
(inst == instrlist_last(ilist) &&
final_exit_shares_prev_stub(dcontext, ilist, f->flags)));
} else {
direct_linkstub_t *dl = (direct_linkstub_t *) l;
l->flags |= LINK_DIRECT;
/* ensure our macros are in synch */
ASSERT(LINKSTUB_DIRECT(l->flags));
ASSERT(LINKSTUB_NORMAL_DIRECT(l->flags));
ASSERT(!LINKSTUB_CBR_FALLTHROUGH(l->flags));
ASSERT(!LINKSTUB_INDIRECT(l->flags));
dl->target_tag = target;
}
}
if (should_separate_stub(dcontext, target, f->flags)
#ifdef CUSTOM_EXIT_STUBS
/* we can't separate custom stubs */
&& instr_exit_stub_code(inst) == NULL
#endif
)
l->flags |= LINK_SEPARATE_STUB;
/* FIXME: we don't yet support !emit ctis: need to avoid patching
* the cti when emit the exit stub */
ASSERT_NOT_IMPLEMENTED(!emit || instr_ok_to_emit(inst));
if (LINKSTUB_CBR_FALLTHROUGH(l->flags)) {
/* target is indicated via cti_offset */
ASSERT_TRUNCATE(l->cti_offset, short, target - f->tag);
l->cti_offset = (ushort) /* really a short */ (target - f->tag);
} else {
ASSERT_TRUNCATE(l->cti_offset, ushort, pc - f->start_pc);
l->cti_offset = (ushort) (pc - f->start_pc);
}
DOCHECK(1, {
/* ensure LINK_ flags were transferred via instr_exit_branch_type */
if (instr_branch_special_exit(inst)) {
ASSERT(!LINKSTUB_INDIRECT(l->flags) &&
TEST(LINK_SPECIAL_EXIT, l->flags));
}
});
#ifdef UNSUPPORTED_API
DOCHECK(1, {
if (instr_branch_targets_prefix(inst))
ASSERT(TEST(LINK_TARGET_PREFIX, l->flags));
});
#endif
if (!EXIT_HAS_STUB(l->flags, f->flags)) {
/* exit cti points straight at ibl routine */
instr_set_branch_target_pc(inst, get_unlinked_entry(dcontext, target));
} else {
/* HACK: set the branch target pc in inst to be its own pc- this ensures
that instr_encode will not fail due to address span problems- the
correct target (to the exit stub) will get patched in when the
exit stub corresponding to this exit branch is emitted later */
instr_set_branch_target_pc(inst, pc);
}
/* PR 267260/PR 214962: keep this exit cti marked */
instr_set_our_mangling(inst, true);
LOG(THREAD, LOG_EMIT, dcontext == GLOBAL_DCONTEXT ||
dcontext->in_opnd_disassemble ? 5U : 3U,
"exit_branch_type=0x%x target="PFX" l->flags=0x%x\n",
instr_exit_branch_type(inst), target, l->flags);
DOCHECK(1, {
if (TEST(FRAG_COARSE_GRAIN, f->flags)) {
ASSERT(!frag_offs_at_end);
/* FIXME: indirect stubs should be separated
* eventually, but right now no good place to put them
* so keeping inline
*/
ASSERT(LINKSTUB_INDIRECT(l->flags) ||
TEST(LINK_SEPARATE_STUB, l->flags));
}
});
/* traversal depends on flags being set */
ASSERT(l->flags != 0);
ASSERT(i <= num_direct_stubs + num_indirect_stubs);
l = LINKSTUB_NEXT_EXIT(l);
DODEBUG({ prev_cti = inst; });
} /* exit cti */
if (instr_ok_to_emit(inst)) {
if (emit) {
pc = instr_encode(dcontext, inst, pc);
ASSERT(pc != NULL);
} else {
pc += instr_length(dcontext, inst);
}
}
}
return pc;
}
/* Emits code for ilist into the fcache, returns created fragment.
* It is not added to the fragment table, nor is it linked!
*/
static fragment_t *
emit_fragment_common(dcontext_t *dcontext, app_pc tag,
instrlist_t *ilist, uint flags, void *vmlist,
bool link_fragment, bool add_to_htable,
fragment_t *replace_fragment)
{
fragment_t *f;
instr_t *inst;
cache_pc pc = 0;
app_pc target;
linkstub_t *l;
uint len;
uint offset = 0;
uint copy_sz = 0;
uint extra_jmp_padding_body = 0;
uint extra_jmp_padding_stubs = 0;
uint last_pad_offset = 0;
uint num_direct_stubs = 0;
uint num_indirect_stubs = 0;
uint stub_size_total = 0; /* those in fcache w/ fragment */
#ifdef CUSTOM_EXIT_STUBS
bool custom_stubs_present = false;
#endif
bool final_cbr_single_stub = false;
byte *prev_stub_pc = NULL;
uint stub_size = 0;
bool no_stub = false;
dr_isa_mode_t isa_mode;
uint mode_flags;
KSTART(emit);
/* we do entire cache b/c links may touch many units
* FIXME: change to lazier version triggered by segfaults or something?
*/
SELF_PROTECT_CACHE(dcontext, NULL, WRITABLE);
/* ensure some higher-level lock is held if f is shared */
ASSERT(!TEST(FRAG_SHARED, flags) || INTERNAL_OPTION(single_thread_in_DR) ||
!USE_BB_BUILDING_LOCK() ||
OWN_MUTEX(&bb_building_lock) || OWN_MUTEX(&trace_building_lock));
/* 1st walk through instr list:
* -- determine body size and number of exit stubs required;
* -- if not padding jmps sets offsets as well
*/
ASSERT(instrlist_first(ilist) != NULL);
isa_mode = instr_get_isa_mode(instrlist_first(ilist));
mode_flags = frag_flags_from_isa_mode(isa_mode);
if (mode_flags != 0)
flags |= mode_flags;
#if defined(X86) && defined(X64)
else if (dr_get_isa_mode(dcontext) == DR_ISA_IA32)
flags |= FRAG_X86_TO_X64;
#endif
for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) {
/* Since decode_fragment needs to be able to decode from the code
* cache, we require that each fragment has a single mode
* (xref PR 278329)
*/
IF_X64(CLIENT_ASSERT(instr_get_isa_mode(inst) == isa_mode,
"single fragment cannot mix x86 and x64 modes"));
if (!PAD_FRAGMENT_JMPS(flags)) {
/* we're going to skip the 2nd pass, save this instr's offset in
* the note field (used by instr_encode) */
instr_set_note(inst, (void *)(ptr_uint_t)offset);
}
if (instr_ok_to_emit(inst))
offset += instr_length(dcontext, inst);
ASSERT_NOT_IMPLEMENTED(!TEST(INSTR_HOT_PATCHABLE, inst->flags));
if (instr_is_exit_cti(inst)) {
target = instr_get_branch_target_pc(inst);
len = exit_stub_size(dcontext, (cache_pc)target, flags);
if (PAD_FRAGMENT_JMPS(flags) && instr_ok_to_emit(inst)) {
/* Most exits have only a single patchable jmp (is difficult
* to handle all the races for more then one). Exceptions are
* usually where you have to patch the jmp in the body as well
* as in the stub and include inlined_indirect (without
* -atomic_inlined_linking), TRACE_HEAD_CACHE_INCR, or a
* custom exit stub with PROFILE_LINKCOUNT. All of these
* have issues with atomically linking/unlinking. Inlined
* indirect has special support for unlinking (but not linking
* hence can't use inlined_ibl on shared frags without
* -atomic_inlined_linking, but is otherwise ok). I suspect
* the other two exceptions are ok as well in practice (just
* racy as to whether the trace head count gets incremented or
* the custom code is executed or we exit cache unnecessarily).
*/
if (is_exit_cti_patchable(dcontext, inst, flags)) {
if (last_pad_offset == 0 ||
!WITHIN_PAD_REGION(last_pad_offset, offset)) {
last_pad_offset = offset - CTI_PATCH_OFFSET;
extra_jmp_padding_body += MAX_PAD_SIZE;
}
}
if (is_exit_cti_stub_patchable(dcontext, inst, flags)) {
extra_jmp_padding_stubs += MAX_PAD_SIZE;
}
}
if (is_indirect_branch_lookup_routine(dcontext, target)) {
num_indirect_stubs++;
STATS_INC(num_indirect_exit_stubs);
LOG(THREAD, LOG_EMIT, 3, "emit_fragment: %s use ibl <"PFX">\n",
TEST(FRAG_IS_TRACE, flags) ? "trace" : "bb", target);
stub_size_total += len;
STATS_FCACHE_ADD(flags, indirect_stubs, len);
} else {
num_direct_stubs++;
STATS_INC(num_direct_exit_stubs);
/* if a cbr is final exit pair, should they share a stub? */
if (INTERNAL_OPTION(cbr_single_stub) &&
inst == instrlist_last(ilist) &&
final_exit_shares_prev_stub(dcontext, ilist, flags)) {
final_cbr_single_stub = true;
STATS_INC(num_cbr_single_stub);
} else if (!should_separate_stub(dcontext, target, flags)) {
stub_size_total += len;
STATS_FCACHE_ADD(flags, direct_stubs, len);
} else /* ensure have cti to jmp to separate stub! */
ASSERT(instr_ok_to_emit(inst));
}
#ifdef CUSTOM_EXIT_STUBS
if (!custom_stubs_present && instr_exit_stub_code(inst) != NULL)
custom_stubs_present = true;
#endif
}
}
DOSTATS({
if (!TEST(FRAG_IS_TRACE, flags)) {
if (num_indirect_stubs > 0) {
if (num_indirect_stubs == 1 && num_direct_stubs == 0)
STATS_INC(num_bb_one_indirect_exit);
else /* funny bb w/ mixture of ind and dir exits */
STATS_INC(num_bb_indirect_extra_exits);
} else {
if (num_direct_stubs == 1)
STATS_INC(num_bb_one_direct_exit);
else if (num_direct_stubs == 2)
STATS_INC(num_bb_two_direct_exits);
else
STATS_INC(num_bb_many_direct_exits);
}
if (TEST(FRAG_HAS_DIRECT_CTI, flags))
STATS_INC(num_bb_has_elided);
if (linkstub_frag_offs_at_end(flags, num_direct_stubs,
num_indirect_stubs))
STATS_INC(num_bb_fragment_offset);
}
});
#ifndef CLIENT_INTERFACE
/* (can't have ifdef inside DOSTATS so we separate it from above stats)
* in a product build we only expect certain kinds of bbs
*/
ASSERT_CURIOSITY(TEST(FRAG_IS_TRACE, flags) ||
(num_indirect_stubs == 1 && num_direct_stubs == 0) ||
(num_indirect_stubs == 0 && num_direct_stubs <= 2) ||
IF_UNIX((num_indirect_stubs == 0 && num_direct_stubs >= 2 &&
TEST(FRAG_HAS_SYSCALL, flags)) ||)
(num_indirect_stubs <= 1 && num_direct_stubs >= 1 &&
TEST(FRAG_SELFMOD_SANDBOXED, flags)));
#endif
#ifdef CUSTOM_EXIT_STUBS
if (custom_stubs_present) {
LOG(THREAD, LOG_EMIT, 3, "emit_fragment: custom stubs present\n");
/* separate walk just for custom exit stubs -- need to get offsets correct
*/
for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) {
if (instr_is_exit_cti(inst)) {
/* custom exit stub code */
instrlist_t *custom = instr_exit_stub_code(inst);
if (custom != NULL) {
instr_t *in;
for (in = instrlist_first(custom); in; in = instr_get_next(in)) {
ASSERT_NOT_IMPLEMENTED(!TEST(INSTR_HOT_PATCHABLE,
inst->flags));
if (!PAD_FRAGMENT_JMPS(flags)) {
/* we're going to skip the 2nd pass, save this
* instr's offset in the note field for use by
* instr_encode */
instr_set_note(in, offset);
}
offset += instr_length(dcontext, in);
}
}
target = instr_get_branch_target_pc(inst);
offset += exit_stub_size(dcontext, (cache_pc)target, flags);
}
}
offset -= stub_size_total;
}
#endif
STATS_PAD_JMPS_ADD(flags, body_bytes, extra_jmp_padding_body);
STATS_PAD_JMPS_ADD(flags, stub_bytes, extra_jmp_padding_stubs);
STATS_FCACHE_ADD(flags, bodies, offset);
STATS_FCACHE_ADD(flags, prefixes, fragment_prefix_size(flags));
if (TEST(FRAG_SELFMOD_SANDBOXED, flags)) {
/* We need a copy of the original app code at bottom of
* fragment. We count it as part of the fragment body size,
* and use a size field stored at the very end (whose storage
* is also included in the fragment body size) to distinguish
* the real body from the selfmod copy (storing it there
* rather than in fragment_t to save space in the common case).
*/
/* assume contiguous bb */
app_pc end_bb_pc;
ASSERT((flags & FRAG_HAS_DIRECT_CTI) == 0);
/* FIXME PR 215217: a client may have truncated or otherwise changed
* the code, but we assume no new code has been added. Thus, checking
* the original full range can only result in a false positive selfmod
* event, which is a performance issue only.
*/
end_bb_pc = find_app_bb_end(dcontext, tag, flags);
ASSERT(end_bb_pc > tag);
ASSERT_TRUNCATE(copy_sz, uint, (ptr_uint_t)(end_bb_pc - tag) + sizeof(uint));
copy_sz = (uint)(end_bb_pc - tag) + sizeof(uint);
/* ensure this doesn't push fragment size over limit */
ASSERT(offset + copy_sz <= MAX_FRAGMENT_SIZE);
offset += copy_sz;
STATS_FCACHE_ADD(flags, selfmod_copy, copy_sz);
}
/* FIXME on linux the signal fence exit before a syscall can trigger
* these ASSERTS. We need some way to mark that exit always unlinked so
* we don't need to pad for it or figure out a better way to remove nops
* for tracing. Xref PR 215179, we allow additional pads for CLIENT_INTERFACE
* and UNIX by marking the bb untraceable and inserting nops. */
#if !defined(UNIX) && !defined(CLIENT_INTERFACE)
# if !defined(PROFILE_LINKCOUNT) && !defined(TRACE_HEAD_CACHE_INCR)
/* bbs shouldn't need more than a single pad */
ASSERT((PAD_FRAGMENT_JMPS(flags) && TEST(FRAG_IS_TRACE, flags)) ||
extra_jmp_padding_body+extra_jmp_padding_stubs ==
(PAD_FRAGMENT_JMPS(flags) ? MAX_PAD_SIZE : 0U));
# else
/* no more than two pads should be needed for a bb with these defines */
ASSERT((PAD_FRAGMENT_JMPS(flags) && TEST(FRAG_IS_TRACE, flags)) ||
extra_jmp_padding_body+extra_jmp_padding_stubs <=
(PAD_FRAGMENT_JMPS(flags) ? 2*MAX_PAD_SIZE : 0U));
# endif
#endif
/* create a new fragment_t, or fill in the emit wrapper for coarse-grain */
/* FIXME : don't worry too much about whether padding should be requested in
* the stub or body argument, fragment_create doesn't distinguish between
* the two */
f = fragment_create(dcontext, tag, offset+extra_jmp_padding_body,
num_direct_stubs, num_indirect_stubs,
stub_size_total+extra_jmp_padding_stubs, flags);
ASSERT(f != NULL);
DOSTATS({
/* PR 217008: avoid gcc warning from truncation assert in XSTATS_TRACK_MAX:
* "comparison is always true due to limited range of data type"
* to turn off have to turn off Wextra; hopefully future gcc
* will have patch out there for Walways-true. */
int tmp_size = f->size;
STATS_TRACK_MAX(max_fragment_requested_size, tmp_size);
});
if (PAD_FRAGMENT_JMPS(flags)) {
uint start_shift;
/* 2nd (pad_jmps) walk through instr list:
* -- record offset of each instr from start of fragment body.
* -- insert any nops needed for patching alignment
* recreate needs to do this too, so we use a shared routine */
start_shift = nop_pad_ilist(dcontext, f, ilist, true /* emitting, set offset */);
fcache_shift_start_pc(dcontext, f, start_shift);
}
/* emit prefix */
insert_fragment_prefix(dcontext, f);
/* 3rd walk through instr list: (2nd if -no_pad_jmps)
* -- initialize and set fields in link stub for each exit cti;
* -- emit each instr into the fragment.
*/
pc = set_linkstub_fields(dcontext, f, ilist, num_direct_stubs, num_indirect_stubs,
true/*encode each instr*/);
/* pc should now be pointing to the beginning of the first exit stub */
/* emit the exit stub code */
#ifdef CUSTOM_EXIT_STUBS
/* need to re-walk the instrlist to get the custom code
* if we had another linkstub_t field we could store it there (used to
* put it in stub_pc but that's not available in indirect_linkstub_t anymore)
*/
inst = instrlist_first(ilist);
#endif
for (l = FRAGMENT_EXIT_STUBS(f); l; l = LINKSTUB_NEXT_EXIT(l)) {
#ifdef CUSTOM_EXIT_STUBS
byte *old_pc;
/* find inst corresponding to l */
while (!instr_is_exit_cti(inst)) {
inst = instr_get_next(inst);
ASSERT(inst != NULL);
}
#endif
if (TEST(FRAG_COARSE_GRAIN, flags) && LINKSTUB_DIRECT(l->flags)) {
/* Coarse-grain fragments do not have direct exit stubs.
* Instead they have entrance stubs, created when linking.
*/
continue;
}
if (!EXIT_HAS_STUB(l->flags, flags)) {
/* there is no stub */
continue;
}
if (final_cbr_single_stub && LINKSTUB_FINAL(l)) {
#ifdef CUSTOM_EXIT_STUBS
ASSERT(instr_exit_stub_code(inst) == NULL);
#endif
no_stub = true;
if (!TEST(LINK_SEPARATE_STUB, l->flags)) {
/* still need to patch the cti, so set pc back to prev stub pc */
pc = prev_stub_pc;
}
LOG(THREAD, LOG_EMIT, 3,
"final exit sharing prev exit's stub @ "PFX"\n", prev_stub_pc);
}
if (TEST(LINK_SEPARATE_STUB, l->flags)) {
if (no_stub) {
if (LINKSTUB_NORMAL_DIRECT(l->flags)) {
direct_linkstub_t *dl = (direct_linkstub_t *) l;
dl->stub_pc = prev_stub_pc;
} else {
ASSERT(LINKSTUB_CBR_FALLTHROUGH(l->flags));
/* stub pc computation should return prev pc */
ASSERT(EXIT_STUB_PC(dcontext, f, l) == prev_stub_pc);
}
} else {
separate_stub_create(dcontext, f, l);
}
prev_stub_pc = EXIT_STUB_PC(dcontext, f, l);
ASSERT(prev_stub_pc != NULL);
/* pointing at start of stub is the unlink entry */
ASSERT(linkstub_unlink_entry_offset(dcontext, f, l) == 0);
patch_branch(EXIT_CTI_PC(f, l), EXIT_STUB_PC(dcontext, f, l), false);
#ifdef CUSTOM_EXIT_STUBS
/* we don't currently support separate custom stubs */
ASSERT(instr_exit_stub_code(inst) == NULL);
#endif
continue;
}
ASSERT(EXIT_HAS_LOCAL_STUB(l->flags, flags));
if (PAD_FRAGMENT_JMPS(flags)) {
uint custom_exit_length = 0;
#ifdef CUSTOM_EXIT_STUBS
/* need to figure out size to get right offset */
if (custom_stubs_present) {
/* inst is pointing at l's exit inst */
instrlist_t *custom = (instrlist_t *) instr_exit_stub_code(inst);
if (custom != NULL) {
instr_t *in;
ASSERT(!no_stub);
for (in = instrlist_first(custom); in; in = instr_get_next(in)) {
custom_exit_length += instr_length(dcontext, in);
}
}
}
#endif
pc = pad_for_exitstub_alignment(dcontext, l, f, pc+custom_exit_length);
}
#ifdef CUSTOM_EXIT_STUBS
old_pc = pc;
if (custom_stubs_present) {
/* inst is pointing at l's exit inst */
instrlist_t *custom = (instrlist_t *) instr_exit_stub_code(inst);
if (custom != NULL) {
instr_t *in;
ASSERT(!no_stub);
for (in = instrlist_first(custom); in; in = instr_get_next(in)) {
pc = instr_encode(dcontext, in, (void*)pc);
ASSERT(pc != NULL);
}
}
}
/* stub_pc is start of entire stub (== start of custom stub) for
* patching, fixed_stub_offset is start of fixed part of stub (beyond
* custom part) for things like linkcount that modify that
*/
if (LINKSTUB_NORMAL_DIRECT(l->flags)) {
direct_linkstub_t *dl = (direct_linkstub_t *) l;
ASSERT(!TEST(LINK_SEPARATE_STUB, l->flags));
dl->stub_pc = old_pc;
}
ASSERT_TRUNCATE(l->fixed_stub_offset, ushort, (pc - old_pc));
l->fixed_stub_offset = (ushort) (pc - old_pc);
/* relocate the exit branch target so it takes to the stub */
patch_branch(EXIT_CTI_PC(f, l), old_pc, false);
#else
if (LINKSTUB_NORMAL_DIRECT(l->flags)) {
direct_linkstub_t *dl = (direct_linkstub_t *) l;
dl->stub_pc = pc;
}
/* relocate the exit branch target so it takes to the unlink
* entry to the stub
*/
patch_branch(EXIT_CTI_PC(f, l),
pc + linkstub_unlink_entry_offset(dcontext, f, l), false);
LOG(THREAD, LOG_EMIT, 3,
"Exit cti "PFX" is targeting "PFX" + 0x%x => "PFX"\n",
EXIT_CTI_PC(f, l), pc, linkstub_unlink_entry_offset(dcontext, f, l),
pc + linkstub_unlink_entry_offset(dcontext, f, l));
#endif
/* FIXME : once bytes_for_exitstub_alignment is implemented for
* PROFILE_LINKCOUNT remove this ifndef */
#ifndef PROFILE_LINKCOUNT
DODEBUG({
uint shift = bytes_for_exitstub_alignment(dcontext, l, f, pc);
if (shift > 0) {
ASSERT(!PAD_FRAGMENT_JMPS(flags));
STATS_PAD_JMPS_ADD(flags, unaligned_stubs, 1);
STATS_PAD_JMPS_ADD(flags, unaligned_stubs_bytes, shift);
}
});
#endif
/* insert an exit stub */
prev_stub_pc = pc;
if (!no_stub)
stub_size = insert_exit_stub(dcontext, f, l, pc);
/* note that we don't do proactive linking here since it may
* depend on whether this is a trace fragment, which is marked
* by the caller, who is responsible for calling link_new_fragment
*/
/* if no_stub we assume stub_size is still what it was for prev stub,
* and yes we do need to adjust it back to the end of the single stub
*/
pc += stub_size;
}
ASSERT(pc - f->start_pc <= f->size);
/* Give back extra space to fcache */
STATS_PAD_JMPS_ADD(flags, excess_bytes,
f->size - (pc - f->start_pc) - copy_sz);
if (PAD_FRAGMENT_JMPS(flags) &&
INTERNAL_OPTION(pad_jmps_return_excess_padding) &&
f->size - (pc - f->start_pc) - copy_sz > 0) {
/* will adjust size, must call before we copy the selfmod since we
* break abstraction by putting the copy space in the fcache
* extra field and fcache needs to read/modify the fields */
fcache_return_extra_space(dcontext, f,
f->size - (pc - f->start_pc) - copy_sz);
}
if (TEST(FRAG_SELFMOD_SANDBOXED, flags)) {
/* put copy of the original app code at bottom of fragment */
cache_pc copy_pc;
ASSERT(f->size > copy_sz);
copy_pc = f->start_pc + f->size - copy_sz;
ASSERT(copy_pc == pc ||
(PAD_FRAGMENT_JMPS(flags) &&
!INTERNAL_OPTION(pad_jmps_return_excess_padding)));
/* size is stored at the end, but included in copy_sz */
memcpy(copy_pc, tag, copy_sz - sizeof(uint));
*((uint *)(copy_pc + copy_sz - sizeof(uint))) = copy_sz;
/* count copy as part of fragment */
pc = copy_pc + copy_sz;
}
ASSERT(pc - f->start_pc <= f->size);
STATS_TRACK_MAX(max_fragment_size, pc - f->start_pc);
STATS_PAD_JMPS_ADD(flags, sum_fragment_bytes_ever, pc - f->start_pc);
/* if we don't give the extra space back to fcache, need to nop out the
* rest of the memory to avoid problems with shifting fcache pointers */
if (PAD_FRAGMENT_JMPS(flags) && !INTERNAL_OPTION(pad_jmps_return_excess_padding)) {
/* these can never be reached, but will be decoded by shift
* fcache pointers */
SET_TO_NOPS(pc, f->size - (pc - f->start_pc));
} else {
ASSERT(f->size - (pc - f->start_pc) == 0);
}
/* finalize the fragment
* that means filling in all offsets, etc. that weren't known at
* instrlist building time
*/
#ifdef PROFILE_RDTSC
if (dynamo_options.profile_times)
finalize_profile_call(dcontext, f);
#endif
#ifdef CHECK_RETURNS_SSE2
finalize_return_check(dcontext, f);
#endif
if ((flags & FRAG_IS_TRACE) != 0) {
/* trace-only finalization */
#ifdef SIDELINE
if (dynamo_options.sideline) {
finalize_sideline_prefix(dcontext, f);
}
#endif
} else {
/* bb-only finalization */
}
#ifdef X86
if ((flags & FRAG_SELFMOD_SANDBOXED) != 0) {
finalize_selfmod_sandbox(dcontext, f);
}
#endif
/* add fragment to vm area lists */
vm_area_add_fragment(dcontext, f, vmlist);
/* store translation info, if requested */
if (TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) {
ASSERT(!TEST(FRAG_COARSE_GRAIN, f->flags));
fragment_record_translation_info(dcontext, f, ilist);
}
/* if necessary, i-cache sync */
machine_cache_sync((void*)f->start_pc, (void*)(f->start_pc+f->size), true);
/* Future removal and replacement w/ the real fragment must be atomic
* wrt linking, so we hold the change_linking_lock across both (xref
* case 5474).
* We must grab the change_linking_lock even for private fragments
* if we have any shared fragments in the picture, to make atomic
* our future fragment additions and removals and the associated
* fragment and future fragment lookups.
* Optimization: we could do away with this and try to only
* grab it when a private fragment needs to create a shared
* future, redoing our lookup with the lock held.
*/
if (link_fragment || add_to_htable)
SHARED_RECURSIVE_LOCK(acquire, change_linking_lock);
if (link_fragment) {
/* link BEFORE adding to the hashtable, to reduce races, though we
* should be able to handle them :)
*/
if (replace_fragment)
shift_links_to_new_fragment(dcontext, replace_fragment, f);
else
link_new_fragment(dcontext, f);
}
if (add_to_htable) {
if (TEST(FRAG_COARSE_GRAIN, f->flags)) {
/* added in link_new_fragment */
} else
fragment_add(dcontext, f);
DOCHECK(1, {
if (TEST(FRAG_SHARED, flags))
ASSERT(fragment_lookup_future(dcontext, tag) == NULL);
else
ASSERT(fragment_lookup_private_future(dcontext, tag) == NULL);
});
}
if (link_fragment || add_to_htable)
SHARED_RECURSIVE_LOCK(release, change_linking_lock);
SELF_PROTECT_CACHE(dcontext, NULL, READONLY);
KSTOP(emit);
return f;
}
/* Emits code for ilist into the fcache, returns the created fragment.
* Does not add the fragment to the ftable, leaving it as an "invisible"
* fragment. This means it is the caller's responsibility to ensure
* it is properly disposed of when done with.
* The fragment is also not linked, to give the caller more flexibility.
*/
fragment_t *
emit_invisible_fragment(dcontext_t *dcontext, app_pc tag,
instrlist_t *ilist, uint flags, void *vmlist)
{
return emit_fragment_common(dcontext, tag, ilist, flags, vmlist,
false /* don't link: up to caller */,
false /* don't add: it's invisible! */,
NULL /* not replacing */);
}
/* Emits code for ilist into the fcache, returns the created
* fragment. Adds the fragment to the fragment hashtable and
* links it as a new fragment.
*/
fragment_t *
emit_fragment(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist, uint flags,
void *vmlist, bool link)
{
return emit_fragment_common(dcontext, tag, ilist, flags, vmlist,
link,
true /* add to htable */,
NULL /* not replacing */);
}
/* Emits code for ilist into the fcache, returns the created
* fragment. Adds the fragment to the fragment hashtable and
* links it as a new fragment.
*/
fragment_t *
emit_fragment_ex(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist, uint flags,
void *vmlist, bool link, bool visible)
{
return emit_fragment_common(dcontext, tag, ilist, flags, vmlist,
link, visible, NULL /* not replacing */);
}
/* Emits code for ilist into the fcache, returns the created
* fragment. Adds the fragment to the fragment hashtable and
* links it as a new fragment by subsuming replace's links.
*/
fragment_t *
emit_fragment_as_replacement(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist,
uint flags, void *vmlist, fragment_t *replace)
{
return emit_fragment_common(dcontext, tag, ilist, flags, vmlist,
true /* link it up */,
true /* add to htable */,
replace /* replace this fragment */);
}