| /* ********************************************************** |
| * Copyright (c) 2012-2014 Google, Inc. All rights reserved. |
| * Copyright (c) 2000-2010 VMware, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of VMware, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* Copyright (c) 2003-2007 Determina Corp. */ |
| /* Copyright (c) 2001-2003 Massachusetts Institute of Technology */ |
| /* Copyright (c) 2000-2001 Hewlett-Packard Company */ |
| |
| /* |
| * emit.c - fragment code generation routines |
| */ |
| |
| #include "globals.h" |
| #include "link.h" |
| #include "fragment.h" |
| #include "fcache.h" |
| #include "proc.h" |
| #include "instrlist.h" |
| #include "emit.h" |
| #include "instrlist.h" |
| #include "instr.h" |
| #include "monitor.h" |
| #include "translate.h" |
| #include <string.h> /* memcpy */ |
| |
| #ifdef DEBUG |
| # include "decode_fast.h" /* for decode_next_pc for stress_recreate_pc */ |
| #endif |
| |
| #define STATS_FCACHE_ADD(flags, stat, val) DOSTATS({ \ |
| if (TEST(FRAG_SHARED, (flags))) { \ |
| if (IN_TRACE_CACHE(flags)) \ |
| STATS_ADD(fcache_shared_trace_##stat, val);\ |
| else \ |
| STATS_ADD(fcache_shared_bb_##stat, val); \ |
| } \ |
| else if (IN_TRACE_CACHE(flags)) \ |
| STATS_ADD(fcache_trace_##stat, val); \ |
| else \ |
| STATS_ADD(fcache_bb_##stat, val); \ |
| }) |
| |
| #ifdef INTERNAL |
| /* case 4344 - verify we can recreate app pc in fragment, returns the pc of |
| * the last instruction in the body of f */ |
| static cache_pc |
| get_last_fragment_body_instr_pc(dcontext_t *dcontext, fragment_t *f) |
| { |
| cache_pc body_last_inst_pc; |
| linkstub_t *l; |
| |
| /* Assumption : the last exit stub exit cti is the last instruction in the |
| * body. PR 215217 enforces this for CLIENT_INTERFACE as well. */ |
| l = FRAGMENT_EXIT_STUBS(f); |
| /* never called on future fragments, so a stub should exist */ |
| while (!LINKSTUB_FINAL(l)) |
| l = LINKSTUB_NEXT_EXIT(l); |
| |
| body_last_inst_pc = EXIT_CTI_PC(f, l); |
| return body_last_inst_pc; |
| } |
| |
| void |
| stress_test_recreate(dcontext_t *dcontext, fragment_t *f, |
| instrlist_t *ilist) |
| { |
| cache_pc body_end_pc = get_last_fragment_body_instr_pc(dcontext, f); |
| app_pc recreated_pc; |
| |
| LOG(THREAD, LOG_MONITOR, 2, "Testing recreating Fragment %d for tag "PFX" at "PFX"\n", |
| f->id, f->tag, f->start_pc); |
| |
| DOLOG(3, LOG_INTERP, { |
| /* visualize translation info if it were to be recorded for every |
| * fragment, not just deleted ones -- for debugging only |
| */ |
| translation_info_t *info = record_translation_info(dcontext, f, NULL); |
| translation_info_print(info, f->start_pc, THREAD); |
| translation_info_free(dcontext, info); |
| /* handy reference of app code and fragment -- only 1st part of trace though */ |
| DOLOG(3, LOG_INTERP, { disassemble_app_bb(dcontext, f->tag, THREAD); }); |
| DOLOG(3, LOG_INTERP, { disassemble_fragment(dcontext, f, false); }); |
| }); |
| |
| DOLOG(2, LOG_MONITOR, { |
| /* Translate them all. |
| * Useful when verifying manually, o/w we just ensure no asserts or crashes. |
| */ |
| cache_pc cpc = f->start_pc; |
| while (cpc <= body_end_pc) { |
| recreated_pc = recreate_app_pc(dcontext, cpc, NULL/*for full test*/); |
| LOG(THREAD, LOG_MONITOR, 2, "\ttranslated cache "PFX" => app "PFX"\n", |
| cpc, recreated_pc); |
| cpc = decode_next_pc(dcontext, cpc); |
| } |
| }); |
| |
| recreated_pc = recreate_app_pc(dcontext, body_end_pc, NULL/*for full test*/); |
| /* FIXME: we should figure out how to test each instruction, while knowing the app state */ |
| LOG(THREAD, LOG_MONITOR, 2, "Testing recreating Fragment #%d recreated_pc="PFX"\n", |
| GLOBAL_STAT(num_fragments), recreated_pc); |
| |
| ASSERT(recreated_pc != NULL); |
| |
| if (INTERNAL_OPTION(stress_recreate_state) && ilist != NULL) |
| stress_test_recreate_state(dcontext, f, ilist); |
| } |
| #endif /* INTERNAL */ |
| |
| /* here instead of link.c b/c link.c doesn't deal w/ Instrs */ |
| bool |
| final_exit_shares_prev_stub(dcontext_t *dcontext, instrlist_t *ilist, uint frag_flags) |
| { |
| /* if a cbr is final exit pair, should they share a stub? */ |
| if (INTERNAL_OPTION(cbr_single_stub) && !TEST(FRAG_COARSE_GRAIN, frag_flags)) { |
| /* don't need to expand since is_exit_cti will rule out level 0 */ |
| instr_t *inst = instrlist_last(ilist); |
| /* FIXME: we could support code after the last cti (this is ubr so |
| * would be out-of-line code) or between cbr and ubr but for |
| * simplicity of identifying exits for traces we don't |
| */ |
| if (instr_is_exit_cti(inst) && instr_is_ubr(inst)) { |
| /* don't need to expand since is_exit_cti will rule out level 0 */ |
| instr_t *prev_cti = instr_get_prev(inst); |
| if (prev_cti != NULL && instr_is_exit_cti(prev_cti) |
| /* cti_loop is fine since cti points to loop instr, enabling |
| * our disambiguation to know which state to look at */ |
| && instr_is_cbr(prev_cti) |
| #ifdef CUSTOM_EXIT_STUBS |
| /* no custom code */ |
| && instr_exit_stub_code(prev_cti) == NULL |
| && instr_exit_stub_code(inst) == NULL |
| #endif |
| #ifdef PROFILE_LINKCOUNT |
| /* no linkcount code, which is only present in traces */ |
| && (!TEST(FRAG_IS_TRACE, frag_flags) || !INTERNAL_OPTION(profile_counts)) |
| #endif |
| /* no separate freeing */ |
| && ((TEST(FRAG_SHARED, frag_flags) && |
| !DYNAMO_OPTION(unsafe_free_shared_stubs)) || |
| (!TEST(FRAG_SHARED, frag_flags) && |
| !DYNAMO_OPTION(free_private_stubs)))) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /* Walks ilist and f's linkstubs, setting each linkstub_t's fields appropriately |
| * for the corresponding exit cti in ilist. |
| * If emit is true, also encodes each instr in ilist to f's cache slot, |
| * increments stats for new fragments, and returns the final pc after all encodings. |
| */ |
| cache_pc |
| set_linkstub_fields(dcontext_t *dcontext, fragment_t *f, instrlist_t *ilist, |
| uint num_direct_stubs, uint num_indirect_stubs, bool emit) |
| { |
| uint i; |
| bool frag_offs_at_end; |
| linkstub_t *l; |
| cache_pc pc; |
| instr_t *inst; |
| app_pc target; |
| DEBUG_DECLARE(instr_t *prev_cti = NULL;) |
| |
| pc = FCACHE_ENTRY_PC(f); |
| l = FRAGMENT_EXIT_STUBS(f); |
| i = 0; |
| frag_offs_at_end = linkstub_frag_offs_at_end(f->flags, num_direct_stubs, |
| num_indirect_stubs); |
| for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { |
| if (instr_is_exit_cti(inst)) { |
| /* l is currently zeroed out but otherwise uninitialized |
| * stub starts out as unlinked and never-been-linked |
| */ |
| ASSERT(l->flags == 0); |
| i++; |
| if (i == num_direct_stubs + num_indirect_stubs) { |
| /* set final flag */ |
| l->flags |= LINK_END_OF_LIST; |
| } |
| if (frag_offs_at_end) |
| l->flags |= LINK_FRAG_OFFS_AT_END; |
| |
| DODEBUG({ |
| if (emit && is_exit_cti_patchable(dcontext, inst, f->flags)) { |
| uint off = patchable_exit_cti_align_offs(dcontext, inst, pc); |
| if (off > 0) { |
| ASSERT(!PAD_FRAGMENT_JMPS(f->flags)); |
| STATS_PAD_JMPS_ADD(f->flags, unaligned_exits, 1); |
| STATS_PAD_JMPS_ADD(f->flags, unaligned_exit_bytes, off); |
| } |
| } |
| }); |
| /* An alternative way of testing this is to match with |
| * is_return_lookup_routine() whenever we get that */ |
| /* FIXME: doing the above is much easier now and it is more reliable |
| * than expecting the branch type flags to propagate through |
| */ |
| l->flags |= instr_exit_branch_type(inst); |
| |
| target = instr_get_branch_target_pc(inst); |
| |
| if (is_indirect_branch_lookup_routine(dcontext, (cache_pc)target)) { |
| ASSERT(IF_WINDOWS_ELSE_0(is_shared_syscall_routine(dcontext, target)) || |
| is_ibl_routine_type(dcontext, (cache_pc)target, |
| extract_branchtype((ushort)instr_exit_branch_type(inst)))); |
| /* this is a mangled form of an original indirect |
| * branch or is a mangled form of an indirect branch |
| * to a real native pc out of the fragment |
| */ |
| l->flags |= LINK_INDIRECT; |
| ASSERT(!LINKSTUB_DIRECT(l->flags)); |
| ASSERT(!LINKSTUB_NORMAL_DIRECT(l->flags)); |
| ASSERT(!LINKSTUB_CBR_FALLTHROUGH(l->flags)); |
| ASSERT(LINKSTUB_INDIRECT(l->flags)); |
| } |
| else { |
| DOSTATS({ |
| if (emit) { |
| if (PTR_UINT_ABS(target - f->tag) > SHRT_MAX) { |
| if (num_indirect_stubs == 0 && num_direct_stubs == 2 && |
| i == 2) |
| STATS_INC(num_bb_fallthru_far); |
| STATS_INC(num_bb_exit_tgt_far); |
| } else { |
| if (num_indirect_stubs == 0 && num_direct_stubs == 2 && |
| i == 2) |
| STATS_INC(num_bb_fallthru_near); |
| STATS_INC(num_bb_exit_tgt_near); |
| } |
| } |
| }); |
| |
| if (LINKSTUB_FINAL(l) && |
| use_cbr_fallthrough_short(f->flags, num_direct_stubs, |
| num_indirect_stubs)) { |
| /* this is how we mark a cbr fallthrough, w/ both |
| * LINK_DIRECT and LINK_INDIRECT |
| */ |
| l->flags |= LINK_DIRECT | LINK_INDIRECT; |
| /* ensure our macros are in synch */ |
| ASSERT(LINKSTUB_DIRECT(l->flags)); |
| ASSERT(!LINKSTUB_NORMAL_DIRECT(l->flags)); |
| ASSERT(LINKSTUB_CBR_FALLTHROUGH(l->flags)); |
| ASSERT(!LINKSTUB_INDIRECT(l->flags)); |
| DOSTATS({ |
| if (emit) |
| STATS_INC(num_bb_cbr_fallthru_shrink); |
| }); |
| ASSERT(prev_cti != NULL && instr_is_cbr(prev_cti)); |
| /* should always qualify for single stub */ |
| ASSERT(!INTERNAL_OPTION(cbr_single_stub) || |
| /* FIXME: this duplicates calc of final_cbr_single_stub |
| * bool cached in emit_fragment_common() |
| */ |
| (inst == instrlist_last(ilist) && |
| final_exit_shares_prev_stub(dcontext, ilist, f->flags))); |
| } else { |
| direct_linkstub_t *dl = (direct_linkstub_t *) l; |
| l->flags |= LINK_DIRECT; |
| /* ensure our macros are in synch */ |
| ASSERT(LINKSTUB_DIRECT(l->flags)); |
| ASSERT(LINKSTUB_NORMAL_DIRECT(l->flags)); |
| ASSERT(!LINKSTUB_CBR_FALLTHROUGH(l->flags)); |
| ASSERT(!LINKSTUB_INDIRECT(l->flags)); |
| dl->target_tag = target; |
| } |
| } |
| |
| if (should_separate_stub(dcontext, target, f->flags) |
| #ifdef CUSTOM_EXIT_STUBS |
| /* we can't separate custom stubs */ |
| && instr_exit_stub_code(inst) == NULL |
| #endif |
| ) |
| l->flags |= LINK_SEPARATE_STUB; |
| |
| /* FIXME: we don't yet support !emit ctis: need to avoid patching |
| * the cti when emit the exit stub */ |
| ASSERT_NOT_IMPLEMENTED(!emit || instr_ok_to_emit(inst)); |
| |
| if (LINKSTUB_CBR_FALLTHROUGH(l->flags)) { |
| /* target is indicated via cti_offset */ |
| ASSERT_TRUNCATE(l->cti_offset, short, target - f->tag); |
| l->cti_offset = (ushort) /* really a short */ (target - f->tag); |
| } else { |
| ASSERT_TRUNCATE(l->cti_offset, ushort, pc - f->start_pc); |
| l->cti_offset = (ushort) (pc - f->start_pc); |
| } |
| |
| DOCHECK(1, { |
| /* ensure LINK_ flags were transferred via instr_exit_branch_type */ |
| if (instr_branch_special_exit(inst)) { |
| ASSERT(!LINKSTUB_INDIRECT(l->flags) && |
| TEST(LINK_SPECIAL_EXIT, l->flags)); |
| } |
| }); |
| #ifdef UNSUPPORTED_API |
| DOCHECK(1, { |
| if (instr_branch_targets_prefix(inst)) |
| ASSERT(TEST(LINK_TARGET_PREFIX, l->flags)); |
| }); |
| #endif |
| |
| if (!EXIT_HAS_STUB(l->flags, f->flags)) { |
| /* exit cti points straight at ibl routine */ |
| instr_set_branch_target_pc(inst, get_unlinked_entry(dcontext, target)); |
| } else { |
| /* HACK: set the branch target pc in inst to be its own pc- this ensures |
| that instr_encode will not fail due to address span problems- the |
| correct target (to the exit stub) will get patched in when the |
| exit stub corresponding to this exit branch is emitted later */ |
| instr_set_branch_target_pc(inst, pc); |
| } |
| /* PR 267260/PR 214962: keep this exit cti marked */ |
| instr_set_our_mangling(inst, true); |
| |
| LOG(THREAD, LOG_EMIT, dcontext == GLOBAL_DCONTEXT || |
| dcontext->in_opnd_disassemble ? 5U : 3U, |
| "exit_branch_type=0x%x target="PFX" l->flags=0x%x\n", |
| instr_exit_branch_type(inst), target, l->flags); |
| |
| DOCHECK(1, { |
| if (TEST(FRAG_COARSE_GRAIN, f->flags)) { |
| ASSERT(!frag_offs_at_end); |
| /* FIXME: indirect stubs should be separated |
| * eventually, but right now no good place to put them |
| * so keeping inline |
| */ |
| ASSERT(LINKSTUB_INDIRECT(l->flags) || |
| TEST(LINK_SEPARATE_STUB, l->flags)); |
| } |
| }); |
| |
| /* traversal depends on flags being set */ |
| ASSERT(l->flags != 0); |
| ASSERT(i <= num_direct_stubs + num_indirect_stubs); |
| l = LINKSTUB_NEXT_EXIT(l); |
| DODEBUG({ prev_cti = inst; }); |
| } /* exit cti */ |
| if (instr_ok_to_emit(inst)) { |
| if (emit) { |
| pc = instr_encode(dcontext, inst, pc); |
| ASSERT(pc != NULL); |
| } else { |
| pc += instr_length(dcontext, inst); |
| } |
| } |
| } |
| return pc; |
| } |
| |
| /* Emits code for ilist into the fcache, returns created fragment. |
| * It is not added to the fragment table, nor is it linked! |
| */ |
| static fragment_t * |
| emit_fragment_common(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *ilist, uint flags, void *vmlist, |
| bool link_fragment, bool add_to_htable, |
| fragment_t *replace_fragment) |
| { |
| fragment_t *f; |
| instr_t *inst; |
| cache_pc pc = 0; |
| app_pc target; |
| linkstub_t *l; |
| uint len; |
| uint offset = 0; |
| uint copy_sz = 0; |
| uint extra_jmp_padding_body = 0; |
| uint extra_jmp_padding_stubs = 0; |
| uint last_pad_offset = 0; |
| uint num_direct_stubs = 0; |
| uint num_indirect_stubs = 0; |
| uint stub_size_total = 0; /* those in fcache w/ fragment */ |
| #ifdef CUSTOM_EXIT_STUBS |
| bool custom_stubs_present = false; |
| #endif |
| bool final_cbr_single_stub = false; |
| byte *prev_stub_pc = NULL; |
| uint stub_size = 0; |
| bool no_stub = false; |
| dr_isa_mode_t isa_mode; |
| uint mode_flags; |
| |
| KSTART(emit); |
| /* we do entire cache b/c links may touch many units |
| * FIXME: change to lazier version triggered by segfaults or something? |
| */ |
| SELF_PROTECT_CACHE(dcontext, NULL, WRITABLE); |
| |
| /* ensure some higher-level lock is held if f is shared */ |
| ASSERT(!TEST(FRAG_SHARED, flags) || INTERNAL_OPTION(single_thread_in_DR) || |
| !USE_BB_BUILDING_LOCK() || |
| OWN_MUTEX(&bb_building_lock) || OWN_MUTEX(&trace_building_lock)); |
| |
| /* 1st walk through instr list: |
| * -- determine body size and number of exit stubs required; |
| * -- if not padding jmps sets offsets as well |
| */ |
| ASSERT(instrlist_first(ilist) != NULL); |
| isa_mode = instr_get_isa_mode(instrlist_first(ilist)); |
| mode_flags = frag_flags_from_isa_mode(isa_mode); |
| if (mode_flags != 0) |
| flags |= mode_flags; |
| #if defined(X86) && defined(X64) |
| else if (dr_get_isa_mode(dcontext) == DR_ISA_IA32) |
| flags |= FRAG_X86_TO_X64; |
| #endif |
| for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { |
| /* Since decode_fragment needs to be able to decode from the code |
| * cache, we require that each fragment has a single mode |
| * (xref PR 278329) |
| */ |
| IF_X64(CLIENT_ASSERT(instr_get_isa_mode(inst) == isa_mode, |
| "single fragment cannot mix x86 and x64 modes")); |
| if (!PAD_FRAGMENT_JMPS(flags)) { |
| /* we're going to skip the 2nd pass, save this instr's offset in |
| * the note field (used by instr_encode) */ |
| instr_set_note(inst, (void *)(ptr_uint_t)offset); |
| } |
| if (instr_ok_to_emit(inst)) |
| offset += instr_length(dcontext, inst); |
| ASSERT_NOT_IMPLEMENTED(!TEST(INSTR_HOT_PATCHABLE, inst->flags)); |
| if (instr_is_exit_cti(inst)) { |
| target = instr_get_branch_target_pc(inst); |
| len = exit_stub_size(dcontext, (cache_pc)target, flags); |
| if (PAD_FRAGMENT_JMPS(flags) && instr_ok_to_emit(inst)) { |
| /* Most exits have only a single patchable jmp (is difficult |
| * to handle all the races for more then one). Exceptions are |
| * usually where you have to patch the jmp in the body as well |
| * as in the stub and include inlined_indirect (without |
| * -atomic_inlined_linking), TRACE_HEAD_CACHE_INCR, or a |
| * custom exit stub with PROFILE_LINKCOUNT. All of these |
| * have issues with atomically linking/unlinking. Inlined |
| * indirect has special support for unlinking (but not linking |
| * hence can't use inlined_ibl on shared frags without |
| * -atomic_inlined_linking, but is otherwise ok). I suspect |
| * the other two exceptions are ok as well in practice (just |
| * racy as to whether the trace head count gets incremented or |
| * the custom code is executed or we exit cache unnecessarily). |
| */ |
| if (is_exit_cti_patchable(dcontext, inst, flags)) { |
| if (last_pad_offset == 0 || |
| !WITHIN_PAD_REGION(last_pad_offset, offset)) { |
| last_pad_offset = offset - CTI_PATCH_OFFSET; |
| extra_jmp_padding_body += MAX_PAD_SIZE; |
| } |
| } |
| if (is_exit_cti_stub_patchable(dcontext, inst, flags)) { |
| extra_jmp_padding_stubs += MAX_PAD_SIZE; |
| } |
| } |
| if (is_indirect_branch_lookup_routine(dcontext, target)) { |
| num_indirect_stubs++; |
| STATS_INC(num_indirect_exit_stubs); |
| LOG(THREAD, LOG_EMIT, 3, "emit_fragment: %s use ibl <"PFX">\n", |
| TEST(FRAG_IS_TRACE, flags) ? "trace" : "bb", target); |
| stub_size_total += len; |
| STATS_FCACHE_ADD(flags, indirect_stubs, len); |
| } else { |
| num_direct_stubs++; |
| STATS_INC(num_direct_exit_stubs); |
| |
| /* if a cbr is final exit pair, should they share a stub? */ |
| if (INTERNAL_OPTION(cbr_single_stub) && |
| inst == instrlist_last(ilist) && |
| final_exit_shares_prev_stub(dcontext, ilist, flags)) { |
| final_cbr_single_stub = true; |
| STATS_INC(num_cbr_single_stub); |
| } else if (!should_separate_stub(dcontext, target, flags)) { |
| stub_size_total += len; |
| STATS_FCACHE_ADD(flags, direct_stubs, len); |
| } else /* ensure have cti to jmp to separate stub! */ |
| ASSERT(instr_ok_to_emit(inst)); |
| } |
| #ifdef CUSTOM_EXIT_STUBS |
| if (!custom_stubs_present && instr_exit_stub_code(inst) != NULL) |
| custom_stubs_present = true; |
| #endif |
| } |
| } |
| |
| DOSTATS({ |
| if (!TEST(FRAG_IS_TRACE, flags)) { |
| if (num_indirect_stubs > 0) { |
| if (num_indirect_stubs == 1 && num_direct_stubs == 0) |
| STATS_INC(num_bb_one_indirect_exit); |
| else /* funny bb w/ mixture of ind and dir exits */ |
| STATS_INC(num_bb_indirect_extra_exits); |
| } else { |
| if (num_direct_stubs == 1) |
| STATS_INC(num_bb_one_direct_exit); |
| else if (num_direct_stubs == 2) |
| STATS_INC(num_bb_two_direct_exits); |
| else |
| STATS_INC(num_bb_many_direct_exits); |
| } |
| if (TEST(FRAG_HAS_DIRECT_CTI, flags)) |
| STATS_INC(num_bb_has_elided); |
| if (linkstub_frag_offs_at_end(flags, num_direct_stubs, |
| num_indirect_stubs)) |
| STATS_INC(num_bb_fragment_offset); |
| } |
| }); |
| #ifndef CLIENT_INTERFACE |
| /* (can't have ifdef inside DOSTATS so we separate it from above stats) |
| * in a product build we only expect certain kinds of bbs |
| */ |
| ASSERT_CURIOSITY(TEST(FRAG_IS_TRACE, flags) || |
| (num_indirect_stubs == 1 && num_direct_stubs == 0) || |
| (num_indirect_stubs == 0 && num_direct_stubs <= 2) || |
| IF_UNIX((num_indirect_stubs == 0 && num_direct_stubs >= 2 && |
| TEST(FRAG_HAS_SYSCALL, flags)) ||) |
| (num_indirect_stubs <= 1 && num_direct_stubs >= 1 && |
| TEST(FRAG_SELFMOD_SANDBOXED, flags))); |
| #endif |
| |
| #ifdef CUSTOM_EXIT_STUBS |
| if (custom_stubs_present) { |
| LOG(THREAD, LOG_EMIT, 3, "emit_fragment: custom stubs present\n"); |
| /* separate walk just for custom exit stubs -- need to get offsets correct |
| */ |
| for (inst = instrlist_first(ilist); inst; inst = instr_get_next(inst)) { |
| if (instr_is_exit_cti(inst)) { |
| /* custom exit stub code */ |
| instrlist_t *custom = instr_exit_stub_code(inst); |
| if (custom != NULL) { |
| instr_t *in; |
| for (in = instrlist_first(custom); in; in = instr_get_next(in)) { |
| ASSERT_NOT_IMPLEMENTED(!TEST(INSTR_HOT_PATCHABLE, |
| inst->flags)); |
| if (!PAD_FRAGMENT_JMPS(flags)) { |
| /* we're going to skip the 2nd pass, save this |
| * instr's offset in the note field for use by |
| * instr_encode */ |
| instr_set_note(in, offset); |
| } |
| offset += instr_length(dcontext, in); |
| } |
| } |
| target = instr_get_branch_target_pc(inst); |
| offset += exit_stub_size(dcontext, (cache_pc)target, flags); |
| } |
| } |
| offset -= stub_size_total; |
| } |
| #endif |
| |
| STATS_PAD_JMPS_ADD(flags, body_bytes, extra_jmp_padding_body); |
| STATS_PAD_JMPS_ADD(flags, stub_bytes, extra_jmp_padding_stubs); |
| |
| STATS_FCACHE_ADD(flags, bodies, offset); |
| STATS_FCACHE_ADD(flags, prefixes, fragment_prefix_size(flags)); |
| |
| if (TEST(FRAG_SELFMOD_SANDBOXED, flags)) { |
| /* We need a copy of the original app code at bottom of |
| * fragment. We count it as part of the fragment body size, |
| * and use a size field stored at the very end (whose storage |
| * is also included in the fragment body size) to distinguish |
| * the real body from the selfmod copy (storing it there |
| * rather than in fragment_t to save space in the common case). |
| */ |
| /* assume contiguous bb */ |
| app_pc end_bb_pc; |
| ASSERT((flags & FRAG_HAS_DIRECT_CTI) == 0); |
| /* FIXME PR 215217: a client may have truncated or otherwise changed |
| * the code, but we assume no new code has been added. Thus, checking |
| * the original full range can only result in a false positive selfmod |
| * event, which is a performance issue only. |
| */ |
| end_bb_pc = find_app_bb_end(dcontext, tag, flags); |
| ASSERT(end_bb_pc > tag); |
| ASSERT_TRUNCATE(copy_sz, uint, (ptr_uint_t)(end_bb_pc - tag) + sizeof(uint)); |
| copy_sz = (uint)(end_bb_pc - tag) + sizeof(uint); |
| /* ensure this doesn't push fragment size over limit */ |
| ASSERT(offset + copy_sz <= MAX_FRAGMENT_SIZE); |
| offset += copy_sz; |
| STATS_FCACHE_ADD(flags, selfmod_copy, copy_sz); |
| } |
| |
| /* FIXME on linux the signal fence exit before a syscall can trigger |
| * these ASSERTS. We need some way to mark that exit always unlinked so |
| * we don't need to pad for it or figure out a better way to remove nops |
| * for tracing. Xref PR 215179, we allow additional pads for CLIENT_INTERFACE |
| * and UNIX by marking the bb untraceable and inserting nops. */ |
| #if !defined(UNIX) && !defined(CLIENT_INTERFACE) |
| # if !defined(PROFILE_LINKCOUNT) && !defined(TRACE_HEAD_CACHE_INCR) |
| /* bbs shouldn't need more than a single pad */ |
| ASSERT((PAD_FRAGMENT_JMPS(flags) && TEST(FRAG_IS_TRACE, flags)) || |
| extra_jmp_padding_body+extra_jmp_padding_stubs == |
| (PAD_FRAGMENT_JMPS(flags) ? MAX_PAD_SIZE : 0U)); |
| # else |
| /* no more than two pads should be needed for a bb with these defines */ |
| ASSERT((PAD_FRAGMENT_JMPS(flags) && TEST(FRAG_IS_TRACE, flags)) || |
| extra_jmp_padding_body+extra_jmp_padding_stubs <= |
| (PAD_FRAGMENT_JMPS(flags) ? 2*MAX_PAD_SIZE : 0U)); |
| # endif |
| #endif |
| |
| /* create a new fragment_t, or fill in the emit wrapper for coarse-grain */ |
| /* FIXME : don't worry too much about whether padding should be requested in |
| * the stub or body argument, fragment_create doesn't distinguish between |
| * the two */ |
| f = fragment_create(dcontext, tag, offset+extra_jmp_padding_body, |
| num_direct_stubs, num_indirect_stubs, |
| stub_size_total+extra_jmp_padding_stubs, flags); |
| ASSERT(f != NULL); |
| DOSTATS({ |
| /* PR 217008: avoid gcc warning from truncation assert in XSTATS_TRACK_MAX: |
| * "comparison is always true due to limited range of data type" |
| * to turn off have to turn off Wextra; hopefully future gcc |
| * will have patch out there for Walways-true. */ |
| int tmp_size = f->size; |
| STATS_TRACK_MAX(max_fragment_requested_size, tmp_size); |
| }); |
| |
| if (PAD_FRAGMENT_JMPS(flags)) { |
| uint start_shift; |
| /* 2nd (pad_jmps) walk through instr list: |
| * -- record offset of each instr from start of fragment body. |
| * -- insert any nops needed for patching alignment |
| * recreate needs to do this too, so we use a shared routine */ |
| start_shift = nop_pad_ilist(dcontext, f, ilist, true /* emitting, set offset */); |
| fcache_shift_start_pc(dcontext, f, start_shift); |
| } |
| |
| /* emit prefix */ |
| insert_fragment_prefix(dcontext, f); |
| |
| /* 3rd walk through instr list: (2nd if -no_pad_jmps) |
| * -- initialize and set fields in link stub for each exit cti; |
| * -- emit each instr into the fragment. |
| */ |
| pc = set_linkstub_fields(dcontext, f, ilist, num_direct_stubs, num_indirect_stubs, |
| true/*encode each instr*/); |
| /* pc should now be pointing to the beginning of the first exit stub */ |
| |
| /* emit the exit stub code */ |
| #ifdef CUSTOM_EXIT_STUBS |
| /* need to re-walk the instrlist to get the custom code |
| * if we had another linkstub_t field we could store it there (used to |
| * put it in stub_pc but that's not available in indirect_linkstub_t anymore) |
| */ |
| inst = instrlist_first(ilist); |
| #endif |
| for (l = FRAGMENT_EXIT_STUBS(f); l; l = LINKSTUB_NEXT_EXIT(l)) { |
| #ifdef CUSTOM_EXIT_STUBS |
| byte *old_pc; |
| /* find inst corresponding to l */ |
| while (!instr_is_exit_cti(inst)) { |
| inst = instr_get_next(inst); |
| ASSERT(inst != NULL); |
| } |
| #endif |
| |
| if (TEST(FRAG_COARSE_GRAIN, flags) && LINKSTUB_DIRECT(l->flags)) { |
| /* Coarse-grain fragments do not have direct exit stubs. |
| * Instead they have entrance stubs, created when linking. |
| */ |
| continue; |
| } |
| |
| if (!EXIT_HAS_STUB(l->flags, flags)) { |
| /* there is no stub */ |
| continue; |
| } |
| |
| if (final_cbr_single_stub && LINKSTUB_FINAL(l)) { |
| #ifdef CUSTOM_EXIT_STUBS |
| ASSERT(instr_exit_stub_code(inst) == NULL); |
| #endif |
| no_stub = true; |
| if (!TEST(LINK_SEPARATE_STUB, l->flags)) { |
| /* still need to patch the cti, so set pc back to prev stub pc */ |
| pc = prev_stub_pc; |
| } |
| LOG(THREAD, LOG_EMIT, 3, |
| "final exit sharing prev exit's stub @ "PFX"\n", prev_stub_pc); |
| } |
| |
| if (TEST(LINK_SEPARATE_STUB, l->flags)) { |
| if (no_stub) { |
| if (LINKSTUB_NORMAL_DIRECT(l->flags)) { |
| direct_linkstub_t *dl = (direct_linkstub_t *) l; |
| dl->stub_pc = prev_stub_pc; |
| } else { |
| ASSERT(LINKSTUB_CBR_FALLTHROUGH(l->flags)); |
| /* stub pc computation should return prev pc */ |
| ASSERT(EXIT_STUB_PC(dcontext, f, l) == prev_stub_pc); |
| } |
| } else { |
| separate_stub_create(dcontext, f, l); |
| } |
| prev_stub_pc = EXIT_STUB_PC(dcontext, f, l); |
| ASSERT(prev_stub_pc != NULL); |
| /* pointing at start of stub is the unlink entry */ |
| ASSERT(linkstub_unlink_entry_offset(dcontext, f, l) == 0); |
| patch_branch(EXIT_CTI_PC(f, l), EXIT_STUB_PC(dcontext, f, l), false); |
| #ifdef CUSTOM_EXIT_STUBS |
| /* we don't currently support separate custom stubs */ |
| ASSERT(instr_exit_stub_code(inst) == NULL); |
| #endif |
| continue; |
| } |
| |
| ASSERT(EXIT_HAS_LOCAL_STUB(l->flags, flags)); |
| |
| if (PAD_FRAGMENT_JMPS(flags)) { |
| uint custom_exit_length = 0; |
| #ifdef CUSTOM_EXIT_STUBS |
| /* need to figure out size to get right offset */ |
| if (custom_stubs_present) { |
| /* inst is pointing at l's exit inst */ |
| instrlist_t *custom = (instrlist_t *) instr_exit_stub_code(inst); |
| if (custom != NULL) { |
| instr_t *in; |
| ASSERT(!no_stub); |
| for (in = instrlist_first(custom); in; in = instr_get_next(in)) { |
| custom_exit_length += instr_length(dcontext, in); |
| } |
| } |
| } |
| #endif |
| pc = pad_for_exitstub_alignment(dcontext, l, f, pc+custom_exit_length); |
| } |
| |
| #ifdef CUSTOM_EXIT_STUBS |
| old_pc = pc; |
| if (custom_stubs_present) { |
| /* inst is pointing at l's exit inst */ |
| instrlist_t *custom = (instrlist_t *) instr_exit_stub_code(inst); |
| if (custom != NULL) { |
| instr_t *in; |
| ASSERT(!no_stub); |
| for (in = instrlist_first(custom); in; in = instr_get_next(in)) { |
| pc = instr_encode(dcontext, in, (void*)pc); |
| ASSERT(pc != NULL); |
| } |
| } |
| } |
| /* stub_pc is start of entire stub (== start of custom stub) for |
| * patching, fixed_stub_offset is start of fixed part of stub (beyond |
| * custom part) for things like linkcount that modify that |
| */ |
| if (LINKSTUB_NORMAL_DIRECT(l->flags)) { |
| direct_linkstub_t *dl = (direct_linkstub_t *) l; |
| ASSERT(!TEST(LINK_SEPARATE_STUB, l->flags)); |
| dl->stub_pc = old_pc; |
| } |
| ASSERT_TRUNCATE(l->fixed_stub_offset, ushort, (pc - old_pc)); |
| l->fixed_stub_offset = (ushort) (pc - old_pc); |
| /* relocate the exit branch target so it takes to the stub */ |
| patch_branch(EXIT_CTI_PC(f, l), old_pc, false); |
| #else |
| if (LINKSTUB_NORMAL_DIRECT(l->flags)) { |
| direct_linkstub_t *dl = (direct_linkstub_t *) l; |
| dl->stub_pc = pc; |
| } |
| /* relocate the exit branch target so it takes to the unlink |
| * entry to the stub |
| */ |
| patch_branch(EXIT_CTI_PC(f, l), |
| pc + linkstub_unlink_entry_offset(dcontext, f, l), false); |
| LOG(THREAD, LOG_EMIT, 3, |
| "Exit cti "PFX" is targeting "PFX" + 0x%x => "PFX"\n", |
| EXIT_CTI_PC(f, l), pc, linkstub_unlink_entry_offset(dcontext, f, l), |
| pc + linkstub_unlink_entry_offset(dcontext, f, l)); |
| #endif |
| |
| /* FIXME : once bytes_for_exitstub_alignment is implemented for |
| * PROFILE_LINKCOUNT remove this ifndef */ |
| #ifndef PROFILE_LINKCOUNT |
| DODEBUG({ |
| uint shift = bytes_for_exitstub_alignment(dcontext, l, f, pc); |
| if (shift > 0) { |
| ASSERT(!PAD_FRAGMENT_JMPS(flags)); |
| STATS_PAD_JMPS_ADD(flags, unaligned_stubs, 1); |
| STATS_PAD_JMPS_ADD(flags, unaligned_stubs_bytes, shift); |
| } |
| }); |
| #endif |
| |
| /* insert an exit stub */ |
| prev_stub_pc = pc; |
| if (!no_stub) |
| stub_size = insert_exit_stub(dcontext, f, l, pc); |
| /* note that we don't do proactive linking here since it may |
| * depend on whether this is a trace fragment, which is marked |
| * by the caller, who is responsible for calling link_new_fragment |
| */ |
| |
| /* if no_stub we assume stub_size is still what it was for prev stub, |
| * and yes we do need to adjust it back to the end of the single stub |
| */ |
| pc += stub_size; |
| } |
| |
| ASSERT(pc - f->start_pc <= f->size); |
| |
| /* Give back extra space to fcache */ |
| STATS_PAD_JMPS_ADD(flags, excess_bytes, |
| f->size - (pc - f->start_pc) - copy_sz); |
| if (PAD_FRAGMENT_JMPS(flags) && |
| INTERNAL_OPTION(pad_jmps_return_excess_padding) && |
| f->size - (pc - f->start_pc) - copy_sz > 0) { |
| /* will adjust size, must call before we copy the selfmod since we |
| * break abstraction by putting the copy space in the fcache |
| * extra field and fcache needs to read/modify the fields */ |
| fcache_return_extra_space(dcontext, f, |
| f->size - (pc - f->start_pc) - copy_sz); |
| } |
| |
| if (TEST(FRAG_SELFMOD_SANDBOXED, flags)) { |
| /* put copy of the original app code at bottom of fragment */ |
| cache_pc copy_pc; |
| |
| ASSERT(f->size > copy_sz); |
| copy_pc = f->start_pc + f->size - copy_sz; |
| ASSERT(copy_pc == pc || |
| (PAD_FRAGMENT_JMPS(flags) && |
| !INTERNAL_OPTION(pad_jmps_return_excess_padding))); |
| /* size is stored at the end, but included in copy_sz */ |
| memcpy(copy_pc, tag, copy_sz - sizeof(uint)); |
| *((uint *)(copy_pc + copy_sz - sizeof(uint))) = copy_sz; |
| /* count copy as part of fragment */ |
| pc = copy_pc + copy_sz; |
| } |
| |
| ASSERT(pc - f->start_pc <= f->size); |
| STATS_TRACK_MAX(max_fragment_size, pc - f->start_pc); |
| STATS_PAD_JMPS_ADD(flags, sum_fragment_bytes_ever, pc - f->start_pc); |
| |
| /* if we don't give the extra space back to fcache, need to nop out the |
| * rest of the memory to avoid problems with shifting fcache pointers */ |
| if (PAD_FRAGMENT_JMPS(flags) && !INTERNAL_OPTION(pad_jmps_return_excess_padding)) { |
| /* these can never be reached, but will be decoded by shift |
| * fcache pointers */ |
| SET_TO_NOPS(pc, f->size - (pc - f->start_pc)); |
| } else { |
| ASSERT(f->size - (pc - f->start_pc) == 0); |
| } |
| |
| /* finalize the fragment |
| * that means filling in all offsets, etc. that weren't known at |
| * instrlist building time |
| */ |
| #ifdef PROFILE_RDTSC |
| if (dynamo_options.profile_times) |
| finalize_profile_call(dcontext, f); |
| #endif |
| #ifdef CHECK_RETURNS_SSE2 |
| finalize_return_check(dcontext, f); |
| #endif |
| if ((flags & FRAG_IS_TRACE) != 0) { |
| /* trace-only finalization */ |
| #ifdef SIDELINE |
| if (dynamo_options.sideline) { |
| finalize_sideline_prefix(dcontext, f); |
| } |
| #endif |
| } else { |
| /* bb-only finalization */ |
| } |
| #ifdef X86 |
| if ((flags & FRAG_SELFMOD_SANDBOXED) != 0) { |
| finalize_selfmod_sandbox(dcontext, f); |
| } |
| #endif |
| /* add fragment to vm area lists */ |
| vm_area_add_fragment(dcontext, f, vmlist); |
| |
| /* store translation info, if requested */ |
| if (TEST(FRAG_HAS_TRANSLATION_INFO, f->flags)) { |
| ASSERT(!TEST(FRAG_COARSE_GRAIN, f->flags)); |
| fragment_record_translation_info(dcontext, f, ilist); |
| } |
| |
| /* if necessary, i-cache sync */ |
| machine_cache_sync((void*)f->start_pc, (void*)(f->start_pc+f->size), true); |
| |
| /* Future removal and replacement w/ the real fragment must be atomic |
| * wrt linking, so we hold the change_linking_lock across both (xref |
| * case 5474). |
| * We must grab the change_linking_lock even for private fragments |
| * if we have any shared fragments in the picture, to make atomic |
| * our future fragment additions and removals and the associated |
| * fragment and future fragment lookups. |
| * Optimization: we could do away with this and try to only |
| * grab it when a private fragment needs to create a shared |
| * future, redoing our lookup with the lock held. |
| */ |
| if (link_fragment || add_to_htable) |
| SHARED_RECURSIVE_LOCK(acquire, change_linking_lock); |
| |
| if (link_fragment) { |
| /* link BEFORE adding to the hashtable, to reduce races, though we |
| * should be able to handle them :) |
| */ |
| if (replace_fragment) |
| shift_links_to_new_fragment(dcontext, replace_fragment, f); |
| else |
| link_new_fragment(dcontext, f); |
| } |
| |
| if (add_to_htable) { |
| if (TEST(FRAG_COARSE_GRAIN, f->flags)) { |
| /* added in link_new_fragment */ |
| } else |
| fragment_add(dcontext, f); |
| |
| DOCHECK(1, { |
| if (TEST(FRAG_SHARED, flags)) |
| ASSERT(fragment_lookup_future(dcontext, tag) == NULL); |
| else |
| ASSERT(fragment_lookup_private_future(dcontext, tag) == NULL); |
| }); |
| } |
| |
| if (link_fragment || add_to_htable) |
| SHARED_RECURSIVE_LOCK(release, change_linking_lock); |
| |
| SELF_PROTECT_CACHE(dcontext, NULL, READONLY); |
| |
| KSTOP(emit); |
| return f; |
| } |
| |
| /* Emits code for ilist into the fcache, returns the created fragment. |
| * Does not add the fragment to the ftable, leaving it as an "invisible" |
| * fragment. This means it is the caller's responsibility to ensure |
| * it is properly disposed of when done with. |
| * The fragment is also not linked, to give the caller more flexibility. |
| */ |
| fragment_t * |
| emit_invisible_fragment(dcontext_t *dcontext, app_pc tag, |
| instrlist_t *ilist, uint flags, void *vmlist) |
| { |
| return emit_fragment_common(dcontext, tag, ilist, flags, vmlist, |
| false /* don't link: up to caller */, |
| false /* don't add: it's invisible! */, |
| NULL /* not replacing */); |
| } |
| |
| /* Emits code for ilist into the fcache, returns the created |
| * fragment. Adds the fragment to the fragment hashtable and |
| * links it as a new fragment. |
| */ |
| fragment_t * |
| emit_fragment(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist, uint flags, |
| void *vmlist, bool link) |
| { |
| return emit_fragment_common(dcontext, tag, ilist, flags, vmlist, |
| link, |
| true /* add to htable */, |
| NULL /* not replacing */); |
| } |
| |
| /* Emits code for ilist into the fcache, returns the created |
| * fragment. Adds the fragment to the fragment hashtable and |
| * links it as a new fragment. |
| */ |
| fragment_t * |
| emit_fragment_ex(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist, uint flags, |
| void *vmlist, bool link, bool visible) |
| { |
| return emit_fragment_common(dcontext, tag, ilist, flags, vmlist, |
| link, visible, NULL /* not replacing */); |
| } |
| |
| /* Emits code for ilist into the fcache, returns the created |
| * fragment. Adds the fragment to the fragment hashtable and |
| * links it as a new fragment by subsuming replace's links. |
| */ |
| fragment_t * |
| emit_fragment_as_replacement(dcontext_t *dcontext, app_pc tag, instrlist_t *ilist, |
| uint flags, void *vmlist, fragment_t *replace) |
| { |
| return emit_fragment_common(dcontext, tag, ilist, flags, vmlist, |
| true /* link it up */, |
| true /* add to htable */, |
| replace /* replace this fragment */); |
| } |
| |