| /* ********************************************************** |
| * Copyright (c) 2016-2023 Google, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * * Neither the name of Google, Inc. nor the names of its contributors may be |
| * used to endorse or promote products derived from this software without |
| * specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| */ |
| |
| /* instru_offline: inserts instrumentation for offline traces. |
| */ |
| |
| #include <stddef.h> /* for offsetof */ |
| #include <string.h> /* for strlen */ |
| #include <sys/types.h> |
| |
| #include <atomic> |
| #include <cstdint> |
| #include <new> |
| |
| #include "dr_api.h" |
| #include "drcovlib.h" |
| #include "drmgr.h" |
| #include "drreg.h" |
| #include "drutil.h" |
| #include "drvector.h" |
| #include "trace_entry.h" |
| #include "utils.h" |
| #include "instru.h" |
| |
| namespace dynamorio { |
| namespace drmemtrace { |
| |
| static const uint MAX_INSTR_COUNT = 64 * 1024; |
| |
| void *(*offline_instru_t::user_load_)(module_data_t *module, int seg_idx); |
| int (*offline_instru_t::user_print_)(void *data, char *dst, size_t max_len); |
| void (*offline_instru_t::user_free_)(void *data); |
| std::atomic<uintptr_t> offline_instru_t::vdso_modbase_; |
| |
| // This constructor is for use in post-processing when we just need the |
| // elision utility functions. |
| offline_instru_t::offline_instru_t() |
| : instru_t(nullptr, nullptr, sizeof(offline_entry_t)) |
| , write_file_func_(nullptr) |
| { |
| // We can't use drmgr in standalone mode, but for post-processing it's just us, |
| // so we just pick a note value. |
| elide_memref_note_ = 1; |
| standalone_ = true; |
| } |
| |
| offline_instru_t::offline_instru_t( |
| void (*insert_load_buf)(void *, instrlist_t *, instr_t *, reg_id_t), |
| drvector_t *reg_vector, |
| ssize_t (*write_file)(file_t file, const void *data, size_t count), |
| file_t module_file, file_t encoding_file, bool disable_optimizations, |
| bool instrs_are_separate, void (*log)(uint level, const char *fmt, ...)) |
| : instru_t(insert_load_buf, reg_vector, sizeof(offline_entry_t), |
| disable_optimizations) |
| , write_file_func_(write_file) |
| , modfile_(module_file) |
| , log_(log) |
| , encoding_file_(encoding_file) |
| , instrs_are_separate_(instrs_are_separate) |
| { |
| drcovlib_status_t res = drmodtrack_init(); |
| DR_ASSERT(res == DRCOVLIB_SUCCESS); |
| DR_ASSERT(write_file != NULL); |
| // Ensure every compiler is packing our struct how we want: |
| DR_ASSERT(sizeof(offline_entry_t) == 8); |
| |
| res = drmodtrack_add_custom_data(load_custom_module_data, print_custom_module_data, |
| NULL, free_custom_module_data); |
| DR_ASSERT(res == DRCOVLIB_SUCCESS); |
| |
| if (!drmgr_init()) |
| DR_ASSERT(false); |
| elide_memref_note_ = drmgr_reserve_note_range(1); |
| DR_ASSERT(elide_memref_note_ != DRMGR_NOTE_NONE); |
| |
| uint64 max_bb_instrs; |
| if (!dr_get_integer_option("max_bb_instrs", &max_bb_instrs)) |
| max_bb_instrs = 256; /* current default */ |
| max_block_encoding_size_ = static_cast<int>(max_bb_instrs) * MAX_INSTR_LENGTH; |
| encoding_lock_ = dr_mutex_create(); |
| encoding_buf_sz_ = ALIGN_FORWARD(max_block_encoding_size_ * 10, dr_page_size()); |
| encoding_buf_start_ = reinterpret_cast<byte *>( |
| dr_raw_mem_alloc(encoding_buf_sz_, DR_MEMPROT_READ | DR_MEMPROT_WRITE, nullptr)); |
| encoding_buf_ptr_ = encoding_buf_start_; |
| // Write out the encoding file header. |
| // 64-bit version. |
| *reinterpret_cast<uint64_t *>(encoding_buf_ptr_) = ENCODING_FILE_VERSION; |
| encoding_buf_ptr_ += sizeof(uint64_t); |
| // 64-bit file type. |
| uint64_t encoding_file_type = |
| static_cast<uint64_t>(encoding_file_type_t::ENCODING_FILE_TYPE_DEFAULT); |
| if (instrs_are_separate_) { |
| encoding_file_type |= static_cast<uint64_t>( |
| encoding_file_type_t::ENCODING_FILE_TYPE_SEPARATE_NON_MOD_INSTRS); |
| } |
| *reinterpret_cast<uint64_t *>(encoding_buf_ptr_) = encoding_file_type; |
| encoding_buf_ptr_ += sizeof(uint64_t); |
| } |
| |
| offline_instru_t::~offline_instru_t() |
| { |
| if (standalone_) |
| return; |
| |
| dr_mutex_lock(encoding_lock_); |
| flush_instr_encodings(); |
| dr_raw_mem_free(encoding_buf_start_, encoding_buf_sz_); |
| dr_mutex_unlock(encoding_lock_); |
| dr_mutex_destroy(encoding_lock_); |
| log_(1, "Wrote " UINT64_FORMAT_STRING " bytes to encoding file\n", |
| encoding_bytes_written_); |
| |
| drcovlib_status_t res; |
| size_t size = 8192; |
| char *buf; |
| size_t wrote; |
| do { |
| buf = (char *)dr_global_alloc(size); |
| res = drmodtrack_dump_buf(buf, size, &wrote); |
| if (res == DRCOVLIB_SUCCESS) { |
| ssize_t written = write_file_func_(modfile_, buf, wrote - 1 /*no null*/); |
| DR_ASSERT(written == (ssize_t)wrote - 1); |
| } |
| dr_global_free(buf, size); |
| size *= 2; |
| } while (res == DRCOVLIB_ERROR_BUF_TOO_SMALL); |
| res = drmodtrack_exit(); |
| DR_ASSERT(res == DRCOVLIB_SUCCESS); |
| drmgr_exit(); |
| } |
| |
| void * |
| offline_instru_t::load_custom_module_data(module_data_t *module, int seg_idx) |
| { |
| void *user_data = nullptr; |
| if (user_load_ != nullptr) |
| user_data = (*user_load_)(module, seg_idx); |
| const char *name = dr_module_preferred_name(module); |
| // We used to store the vdso contents, but we now use separate block encodings |
| // for vdso code. So we just find the vdso here, and pass through the user's data |
| // for all modules. |
| if (seg_idx == 0 && |
| ((name != nullptr && |
| (strstr(name, "linux-gate.so") == name || |
| strstr(name, "linux-vdso.so") == name)) || |
| (module->names.file_name != NULL && strcmp(name, "[vdso]") == 0))) { |
| DR_ASSERT(vdso_modbase_.load(std::memory_order_acquire) == 0 || |
| vdso_modbase_.load(std::memory_order_acquire) == |
| reinterpret_cast<uintptr_t>(module->start)); |
| vdso_modbase_.store(reinterpret_cast<uintptr_t>(module->start), |
| std::memory_order_release); |
| } |
| if (user_data != nullptr) { |
| void *alloc = dr_global_alloc(sizeof(custom_module_data_t)); |
| return new (alloc) custom_module_data_t(nullptr, 0, user_data); |
| } |
| return nullptr; |
| } |
| |
| int |
| offline_instru_t::print_module_data_fields( |
| char *dst, size_t max_len, const void *custom_data, size_t custom_size, |
| int (*user_print_cb)(void *data, char *dst, size_t max_len), void *user_cb_data) |
| { |
| char *cur = dst; |
| int len = dr_snprintf(dst, max_len, "v#%d,%zu,", CUSTOM_MODULE_VERSION, custom_size); |
| if (len < 0) |
| return -1; |
| cur += len; |
| if (cur - dst + custom_size > max_len) |
| return -1; |
| if (custom_size > 0) { |
| memcpy(cur, custom_data, custom_size); |
| cur += custom_size; |
| } |
| if (user_print_cb != nullptr) { |
| int res = (*user_print_cb)(user_cb_data, cur, max_len - (cur - dst)); |
| if (res == -1) |
| return -1; |
| cur += res; |
| } |
| return (int)(cur - dst); |
| } |
| |
| int |
| offline_instru_t::print_custom_module_data(void *data, char *dst, size_t max_len) |
| { |
| custom_module_data_t *custom = (custom_module_data_t *)data; |
| // We use ascii for the size to keep the module list human-readable except |
| // for the few modules like vdso that have a binary blob. |
| // We include a version #. |
| if (custom == nullptr) { |
| return dr_snprintf(dst, max_len, "v#%d,0,", CUSTOM_MODULE_VERSION); |
| } |
| return print_module_data_fields(dst, max_len, custom->base, custom->size, user_print_, |
| custom->user_data); |
| } |
| |
| void |
| offline_instru_t::free_custom_module_data(void *data) |
| { |
| custom_module_data_t *custom = (custom_module_data_t *)data; |
| if (custom == nullptr) |
| return; |
| if (user_free_ != nullptr) |
| (*user_free_)(custom->user_data); |
| custom->~custom_module_data_t(); |
| dr_global_free(custom, sizeof(*custom)); |
| } |
| |
| bool |
| offline_instru_t::custom_module_data(void *(*load_cb)(module_data_t *module, int seg_idx), |
| int (*print_cb)(void *data, char *dst, |
| size_t max_len), |
| void (*free_cb)(void *data)) |
| { |
| user_load_ = load_cb; |
| user_print_ = print_cb; |
| user_free_ = free_cb; |
| return true; |
| } |
| |
| trace_type_t |
| offline_instru_t::get_entry_type(byte *buf_ptr) const |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| switch (entry->addr.type) { |
| case OFFLINE_TYPE_MEMREF: return TRACE_TYPE_READ; |
| case OFFLINE_TYPE_MEMREF_HIGH: return TRACE_TYPE_READ; |
| case OFFLINE_TYPE_PC: return TRACE_TYPE_INSTR; |
| case OFFLINE_TYPE_THREAD: return TRACE_TYPE_THREAD; |
| case OFFLINE_TYPE_PID: return TRACE_TYPE_PID; |
| case OFFLINE_TYPE_TIMESTAMP: return TRACE_TYPE_THREAD; // Closest. |
| case OFFLINE_TYPE_IFLUSH: return TRACE_TYPE_INSTR_FLUSH; |
| case OFFLINE_TYPE_EXTENDED: return TRACE_TYPE_MARKER; // Closest. |
| } |
| DR_ASSERT(false); |
| return TRACE_TYPE_THREAD_EXIT; // Unknown: returning rarest entry. |
| } |
| |
| size_t |
| offline_instru_t::get_entry_size(byte *buf_ptr) const |
| { |
| // We don't know it: the post-processor adds it. |
| return 0; |
| } |
| |
| int |
| offline_instru_t::get_instr_count(byte *buf_ptr) const |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| if (entry->addr.type != OFFLINE_TYPE_PC) |
| return 0; |
| // TODO i#3995: We should *not* count "non-fetched" instrs so we'll match |
| // hardware performance counters. |
| // Xref i#4948 and i#4915 on getting rid of "non-fetched" instrs. |
| return entry->pc.instr_count; |
| } |
| |
| addr_t |
| offline_instru_t::get_entry_addr(void *drcontext, byte *buf_ptr) const |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| if (entry->addr.type == OFFLINE_TYPE_PC) { |
| // XXX i#4014: Use caching to avoid lookup for last queried modbase. |
| app_pc modbase; |
| if (drmodtrack_lookup_pc_from_index(drcontext, entry->pc.modidx, &modbase) != |
| DRCOVLIB_SUCCESS) |
| return 0; |
| return reinterpret_cast<addr_t>(modbase) + static_cast<addr_t>(entry->pc.modoffs); |
| } |
| return entry->addr.addr; |
| } |
| |
| void |
| offline_instru_t::set_entry_addr(byte *buf_ptr, addr_t addr) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->addr.addr = addr; |
| } |
| |
| int |
| offline_instru_t::append_pid(byte *buf_ptr, process_id_t pid) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->pid.type = OFFLINE_TYPE_PID; |
| entry->pid.pid = pid; |
| return sizeof(offline_entry_t); |
| } |
| |
| int |
| offline_instru_t::append_tid(byte *buf_ptr, thread_id_t tid) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->tid.type = OFFLINE_TYPE_THREAD; |
| entry->tid.tid = tid; |
| return sizeof(offline_entry_t); |
| } |
| |
| int |
| offline_instru_t::append_thread_exit(byte *buf_ptr, thread_id_t tid) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->extended.type = OFFLINE_TYPE_EXTENDED; |
| entry->extended.ext = OFFLINE_EXT_TYPE_FOOTER; |
| entry->extended.valueA = 0; |
| entry->extended.valueB = 0; |
| return sizeof(offline_entry_t); |
| } |
| |
| int |
| offline_instru_t::append_marker(byte *buf_ptr, trace_marker_type_t type, uintptr_t val) |
| { |
| int extra_size = 0; |
| #ifdef X64 |
| if ((unsigned long long)val >= 1ULL << EXT_VALUE_A_BITS) { |
| // We need two entries. |
| // XXX: What we should do is change these types to signed so we can avoid |
| // two entries for small negative numbers. That requires a version bump |
| // though which adds complexity for backward compatibility. |
| DR_ASSERT(type != TRACE_MARKER_TYPE_SPLIT_VALUE); |
| extra_size = append_marker(buf_ptr, TRACE_MARKER_TYPE_SPLIT_VALUE, val >> 32); |
| buf_ptr += extra_size; |
| val = (uint)val; |
| } |
| #else |
| // XXX i#5634: We're truncating timestamps and other values by limiting to |
| // pointer-sized payloads: what we should do is use multiple markers (need up to 3) |
| // to support 64-bit values in 32-bit builds. However, this means we need an |
| // analysis-tool-visible extended-payload marker type, or maybe make the reader |
| // hide that from the user. |
| #endif |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->extended.valueA = val; |
| DR_ASSERT(entry->extended.valueA == val); |
| entry->extended.type = OFFLINE_TYPE_EXTENDED; |
| entry->extended.ext = OFFLINE_EXT_TYPE_MARKER; |
| DR_ASSERT((uint)type < 1 << EXT_VALUE_B_BITS); |
| entry->extended.valueB = type; |
| return sizeof(offline_entry_t) + extra_size; |
| } |
| |
| int |
| offline_instru_t::append_iflush(byte *buf_ptr, addr_t start, size_t size) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->addr.type = OFFLINE_TYPE_IFLUSH; |
| entry->addr.addr = start; |
| ++entry; |
| entry->addr.type = OFFLINE_TYPE_IFLUSH; |
| entry->addr.addr = start + size; |
| return 2 * sizeof(offline_entry_t); |
| } |
| |
| int |
| offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid, |
| offline_file_type_t file_type) |
| { |
| byte *new_buf = buf_ptr; |
| offline_entry_t *entry = (offline_entry_t *)new_buf; |
| entry->extended.type = OFFLINE_TYPE_EXTENDED; |
| entry->extended.ext = OFFLINE_EXT_TYPE_HEADER; |
| entry->extended.valueA = file_type; |
| entry->extended.valueB = OFFLINE_FILE_VERSION; |
| new_buf += sizeof(*entry); |
| new_buf += append_tid(new_buf, tid); |
| new_buf += append_pid(new_buf, dr_get_process_id()); |
| new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, |
| proc_get_cache_line_size()); |
| new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_PAGE_SIZE, dr_page_size()); |
| #if defined(AARCH64) |
| // TRACE_MARKER_TYPE_VECTOR_LENGTH is emitted in the thread header to establish the |
| // initial vector length for the thread, but the marker can also be emitted again |
| // later if the app changes the vector length. |
| if (proc_has_feature(FEATURE_SVE)) { |
| new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_VECTOR_LENGTH, |
| proc_get_vector_length_bytes()); |
| } |
| #endif |
| return (int)(new_buf - buf_ptr); |
| } |
| |
| int |
| offline_instru_t::append_thread_header(byte *buf_ptr, thread_id_t tid) |
| { |
| return append_thread_header(buf_ptr, tid, OFFLINE_FILE_TYPE_DEFAULT); |
| } |
| |
| int |
| offline_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) |
| { |
| byte *new_buf = buf_ptr; |
| new_buf += append_timestamp(new_buf); |
| if (window >= 0) |
| new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_WINDOW_ID, (uintptr_t)window); |
| new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CPU_ID, instru_t::get_cpu_id()); |
| return (int)(new_buf - buf_ptr); |
| } |
| |
| int |
| offline_instru_t::append_timestamp(byte *buf_ptr) |
| { |
| offline_entry_t *entry = (offline_entry_t *)buf_ptr; |
| entry->timestamp.type = OFFLINE_TYPE_TIMESTAMP; |
| uint64 frozen = frozen_timestamp_.load(std::memory_order_acquire); |
| entry->timestamp.usec = frozen != 0 ? frozen : instru_t::get_timestamp(); |
| return sizeof(*entry); |
| } |
| |
| bool |
| offline_instru_t::clamp_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) |
| { |
| offline_entry_t *stamp = reinterpret_cast<offline_entry_t *>(buf_ptr); |
| DR_ASSERT(stamp->timestamp.type == OFFLINE_TYPE_TIMESTAMP); |
| if (stamp->timestamp.usec < min_timestamp) { |
| log_(2, "%s: replacing " UINT64_FORMAT_STRING " with " UINT64_FORMAT_STRING "\n", |
| __FUNCTION__, stamp->timestamp.usec, min_timestamp); |
| stamp->timestamp.usec = min_timestamp; |
| return true; |
| } |
| return false; |
| } |
| |
| int |
| offline_instru_t::insert_save_entry(void *drcontext, instrlist_t *ilist, instr_t *where, |
| reg_id_t reg_ptr, reg_id_t scratch, int adjust, |
| offline_entry_t *entry) |
| { |
| int disp = adjust; |
| #ifdef X64 |
| instrlist_insert_mov_immed_ptrsz(drcontext, (ptr_int_t)entry->combined_value, |
| opnd_create_reg(scratch), ilist, where, NULL, NULL); |
| MINSERT(ilist, where, |
| XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg_ptr, disp), |
| opnd_create_reg(scratch))); |
| #else |
| instrlist_insert_mov_immed_ptrsz(drcontext, (int)entry->combined_value, |
| opnd_create_reg(scratch), ilist, where, NULL, NULL); |
| MINSERT(ilist, where, |
| XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg_ptr, disp), |
| opnd_create_reg(scratch))); |
| instrlist_insert_mov_immed_ptrsz(drcontext, (int)(entry->combined_value >> 32), |
| opnd_create_reg(scratch), ilist, where, NULL, NULL); |
| MINSERT(ilist, where, |
| XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg_ptr, disp + 4), |
| opnd_create_reg(scratch))); |
| #endif |
| return sizeof(offline_entry_t); |
| } |
| |
| // Caller must hold the encoding_lock. |
| void |
| offline_instru_t::flush_instr_encodings() |
| { |
| DR_ASSERT(dr_mutex_self_owns(encoding_lock_)); |
| size_t size = encoding_buf_ptr_ - encoding_buf_start_; |
| if (size == 0) |
| return; |
| ssize_t written = write_file_func_(encoding_file_, encoding_buf_start_, size); |
| log_(2, "%s: Wrote %zu/%zu bytes to encoding file\n", __FUNCTION__, written, size); |
| DR_ASSERT(written == static_cast<ssize_t>(size)); |
| encoding_buf_ptr_ = encoding_buf_start_; |
| encoding_bytes_written_ += written; |
| } |
| |
| void |
| offline_instru_t::record_instr_encodings(void *drcontext, app_pc tag_pc, |
| per_block_t *per_block, instrlist_t *ilist) |
| { |
| dr_mutex_lock(encoding_lock_); |
| log_(3, "%s: new block id " UINT64_FORMAT_STRING " for %p\n", __FUNCTION__, |
| encoding_id_, tag_pc); |
| per_block->id = encoding_id_++; |
| per_block->encoding_length_start = encoding_length_; |
| |
| if (encoding_buf_ptr_ + max_block_encoding_size_ >= |
| encoding_buf_start_ + encoding_buf_sz_) { |
| flush_instr_encodings(); |
| } |
| byte *buf_start = encoding_buf_ptr_; |
| byte *buf = buf_start; |
| buf += sizeof(encoding_entry_t); |
| |
| bool in_emulation_region = false; |
| for (instr_t *instr = instrlist_first(ilist); instr != NULL; |
| instr = instr_get_next(instr)) { |
| instr_t *to_copy = nullptr; |
| emulated_instr_t emulation_info = { sizeof(emulation_info), 0 }; |
| if (in_emulation_region) { |
| if (drmgr_is_emulation_end(instr)) |
| in_emulation_region = false; |
| } else if (drmgr_is_emulation_start(instr)) { |
| bool ok = drmgr_get_emulated_instr_data(instr, &emulation_info); |
| DR_ASSERT(ok); |
| to_copy = emulation_info.instr; |
| in_emulation_region = true; |
| } else if (instr_is_app(instr)) { |
| to_copy = instr; |
| } |
| if (to_copy == nullptr) |
| continue; |
| // To handle application code hooked by DR we cannot just copy from |
| // instr_get_app_pc(): we have to encode. Nearly all the time this |
| // will be a pure memcpy so this only incurs an actual encoding walk |
| // for the hooked level 4 instrs. |
| byte *end_pc = |
| instr_encode_to_copy(drcontext, to_copy, buf, instr_get_app_pc(to_copy)); |
| DR_ASSERT(end_pc != nullptr); |
| buf = end_pc; |
| DR_ASSERT(buf < encoding_buf_start_ + encoding_buf_sz_); |
| } |
| |
| DR_ASSERT(buf >= buf_start + sizeof(encoding_entry_t)); |
| if (buf == buf_start + sizeof(encoding_entry_t)) { |
| // If the given ilist has no app instr, we skip writing anything to the |
| // encoding file. |
| dr_mutex_unlock(encoding_lock_); |
| return; |
| } |
| encoding_entry_t *enc = reinterpret_cast<encoding_entry_t *>(buf_start); |
| enc->length = buf - buf_start; |
| enc->id = per_block->id; |
| // We put the ARM vs Thumb mode into the modoffs to ensure proper decoding. |
| enc->start_pc = reinterpret_cast<uint64_t>( |
| dr_app_pc_as_jump_target(instr_get_isa_mode(instrlist_first(ilist)), tag_pc)); |
| log_(2, "%s: Recorded %zu bytes for id " UINT64_FORMAT_STRING " @ %p\n", __FUNCTION__, |
| enc->length, enc->id, tag_pc); |
| encoding_length_ += (enc->length - sizeof(encoding_entry_t)); |
| encoding_buf_ptr_ += enc->length; |
| dr_mutex_unlock(encoding_lock_); |
| } |
| |
| bool |
| offline_instru_t::does_pc_require_encoding(void *drcontext, app_pc pc, uint *modidx_out, |
| app_pc *modbase_out) |
| { |
| uint modidx; |
| app_pc modbase; |
| bool res = drmodtrack_lookup(drcontext, pc, &modidx, &modbase) != DRCOVLIB_SUCCESS || |
| // We treat the VDSO as generated code, storing its encodings. |
| reinterpret_cast<uintptr_t>(modbase) == |
| vdso_modbase_.load(std::memory_order_acquire); |
| if (modidx_out != nullptr) |
| *modidx_out = modidx; |
| if (modbase_out != nullptr) |
| *modbase_out = modbase; |
| return res; |
| } |
| |
| int |
| offline_instru_t::insert_save_pc(void *drcontext, instrlist_t *ilist, instr_t *where, |
| reg_id_t reg_ptr, reg_id_t scratch, int adjust, |
| app_pc pc, uint instr_count, per_block_t *per_block) |
| { |
| offline_entry_t entry; |
| entry.pc.type = OFFLINE_TYPE_PC; |
| app_pc modbase; |
| uint modidx; |
| uint64_t modoffs; |
| if (!does_pc_require_encoding(drcontext, pc, &modidx, &modbase)) { |
| // TODO i#2062: We need to also identify modified library code and record |
| // its encodings. The plan is to augment drmodtrack to track this for us; |
| // for now we will incorrectly use the original bits in the trace. |
| // |
| // We put the ARM vs Thumb mode into the modoffs to ensure proper decoding. |
| modoffs = dr_app_pc_as_jump_target(instr_get_isa_mode(where), pc) - modbase; |
| DR_ASSERT(modidx != PC_MODIDX_INVALID); |
| } else { |
| modidx = PC_MODIDX_INVALID; |
| // For generated code we store the id for matching with the encodings recorded |
| // into the encoding file. |
| if (instrs_are_separate_) { |
| DR_ASSERT(pc >= per_block->start_pc); |
| modoffs = pc - per_block->start_pc + per_block->encoding_length_start; |
| } else { |
| modoffs = per_block->id; |
| } |
| } |
| // Check that the values we want to assign to the bitfields in offline_entry_t do not |
| // overflow. In i#2956 we observed an overflow for the modidx field. |
| DR_ASSERT(modoffs < uint64_t(1) << PC_MODOFFS_BITS); |
| DR_ASSERT(modidx < uint64_t(1) << PC_MODIDX_BITS); |
| DR_ASSERT(instr_count < uint64_t(1) << PC_INSTR_COUNT_BITS); |
| entry.pc.modoffs = modoffs; |
| entry.pc.modidx = modidx; |
| entry.pc.instr_count = instr_count; |
| return insert_save_entry(drcontext, ilist, where, reg_ptr, scratch, adjust, &entry); |
| } |
| |
| int |
| offline_instru_t::insert_save_type_and_size(void *drcontext, instrlist_t *ilist, |
| instr_t *where, reg_id_t reg_ptr, |
| reg_id_t scratch, int adjust, instr_t *app, |
| opnd_t ref, bool write) |
| { |
| ushort type = (ushort)(write ? TRACE_TYPE_WRITE : TRACE_TYPE_READ); |
| ushort size = (ushort)drutil_opnd_mem_size_in_bytes(ref, app); |
| if (instr_is_prefetch(app)) { |
| type = instru_t::instr_to_prefetch_type(app); |
| // Prefetch instruction may have zero sized mem reference. |
| size = 1; |
| } else if (instr_is_flush(app)) { |
| type = instru_t::instr_to_flush_type(app); |
| } |
| offline_entry_t entry; |
| entry.extended.type = OFFLINE_TYPE_EXTENDED; |
| entry.extended.ext = OFFLINE_EXT_TYPE_MEMINFO; |
| entry.extended.valueB = type; |
| entry.extended.valueA = size; |
| return insert_save_entry(drcontext, ilist, where, reg_ptr, scratch, adjust, &entry); |
| } |
| |
| bool |
| offline_instru_t::opnd_disp_is_elidable(opnd_t memop) |
| { |
| return !disable_optimizations_ && opnd_is_near_base_disp(memop) && |
| opnd_get_base(memop) != DR_REG_NULL && |
| opnd_get_index(memop) == DR_REG_NULL |
| #ifdef AARCH64 |
| /* On AArch64 we cannot directly store SP to memory. */ |
| && opnd_get_base(memop) != DR_REG_SP |
| #elif defined(AARCH32) |
| /* Avoid complexities with PC bases which are completely elided separately. */ |
| && opnd_get_base(memop) != DR_REG_PC |
| #endif |
| ; |
| } |
| |
| int |
| offline_instru_t::insert_save_addr(void *drcontext, instrlist_t *ilist, instr_t *where, |
| reg_id_t reg_ptr, int adjust, opnd_t ref, bool write) |
| { |
| int disp = adjust; |
| reg_id_t reg_addr = DR_REG_NULL; |
| bool reserved = false; |
| bool have_addr = false; |
| drreg_status_t res; |
| if (opnd_disp_is_elidable(ref)) { |
| /* Optimization: to avoid needing a scratch reg to lea into, we simply |
| * store the base reg directly and add the disp during post-processing. |
| */ |
| reg_addr = opnd_get_base(ref); |
| if (opnd_get_base(ref) == reg_ptr) { |
| /* Here we do need a scratch reg, and raw2trace can't identify this case: |
| * so we set disp to 0 and use the regular path below. |
| */ |
| opnd_set_disp(&ref, 0); |
| } else |
| have_addr = true; |
| } |
| if (!have_addr) { |
| res = drreg_reserve_register(drcontext, ilist, where, reg_vector_, ®_addr); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| reserved = true; |
| bool reg_ptr_used; |
| insert_obtain_addr(drcontext, ilist, where, reg_addr, reg_ptr, ref, |
| ®_ptr_used); |
| if (reg_ptr_used) { |
| // Re-load because reg_ptr was clobbered. |
| insert_load_buf_ptr_(drcontext, ilist, where, reg_ptr); |
| } |
| reserved = true; |
| } |
| MINSERT(ilist, where, |
| XINST_CREATE_store(drcontext, OPND_CREATE_MEMPTR(reg_ptr, disp), |
| opnd_create_reg(reg_addr))); |
| if (reserved) { |
| res = drreg_unreserve_register(drcontext, ilist, where, reg_addr); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| } |
| return sizeof(offline_entry_t); |
| } |
| |
| // The caller should already have verified that either instr_reads_memory() or |
| // instr_writes_memory(). |
| bool |
| offline_instru_t::instr_has_multiple_different_memrefs(instr_t *instr) |
| { |
| int count = 0; |
| opnd_t first_memref = opnd_create_null(); |
| for (int i = 0; i < instr_num_srcs(instr); i++) { |
| opnd_t op = instr_get_src(instr, i); |
| if (opnd_is_memory_reference(op)) { |
| if (count == 0) |
| first_memref = op; |
| else if (!opnd_same(op, first_memref)) |
| return true; |
| ++count; |
| } |
| } |
| for (int i = 0; i < instr_num_dsts(instr); i++) { |
| opnd_t op = instr_get_dst(instr, i); |
| if (opnd_is_memory_reference(op)) { |
| if (count == 0) |
| first_memref = op; |
| else if (!opnd_same(op, first_memref)) |
| return true; |
| ++count; |
| } |
| } |
| return false; |
| } |
| |
| int |
| offline_instru_t::instrument_memref(void *drcontext, void *bb_field, instrlist_t *ilist, |
| instr_t *where, reg_id_t reg_ptr, int adjust, |
| instr_t *app, opnd_t ref, int ref_index, bool write, |
| dr_pred_type_t pred, bool memref_needs_full_info) |
| { |
| // Check whether we can elide this address. |
| // We expect our labels to be at "where" due to drbbdup's handling of block-final |
| // instrs, but for exclusive store post-instr insertion we make sure we walk |
| // across that app instr. |
| for (instr_t *prev = instr_get_prev(where); |
| prev != nullptr && (!instr_is_app(prev) || instr_is_exclusive_store(prev)); |
| prev = instr_get_prev(prev)) { |
| int elided_index; |
| bool elided_is_store; |
| if (label_marks_elidable(prev, &elided_index, nullptr, &elided_is_store, |
| nullptr) && |
| elided_index == ref_index && elided_is_store == write) { |
| return adjust; |
| } |
| } |
| // Post-processor distinguishes read, write, prefetch, flush, and finds size. |
| if (!memref_needs_full_info) // For full info we skip this for !pred |
| instrlist_set_auto_predicate(ilist, pred); |
| // We allow either 0 or all 1's as the type so no need to write anything else, |
| // unless a filter is in place in which case we need a PC entry. |
| if (memref_needs_full_info) { |
| per_block_t *per_block = reinterpret_cast<per_block_t *>(bb_field); |
| reg_id_t reg_tmp; |
| drreg_status_t res = |
| drreg_reserve_register(drcontext, ilist, where, reg_vector_, ®_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| adjust += insert_save_pc(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, |
| instr_get_app_pc(app), 0, per_block); |
| if (instr_has_multiple_different_memrefs(app)) { |
| // i#2756: post-processing can't determine which memref this is, so we |
| // insert a type entry. (For instrs w/ identical memrefs, like an ALU |
| // operation, the addresses are the same and the load will pass the |
| // filter first and be found first in post-processing.) |
| adjust += insert_save_type_and_size(drcontext, ilist, where, reg_ptr, reg_tmp, |
| adjust, app, ref, write); |
| } |
| res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| } |
| adjust += insert_save_addr(drcontext, ilist, where, reg_ptr, adjust, ref, write); |
| instrlist_set_auto_predicate(ilist, DR_PRED_NONE); |
| return adjust; |
| } |
| |
| // We stored the instr count in bb_field==per_block_t in bb_analysis(). |
| int |
| offline_instru_t::instrument_instr(void *drcontext, void *tag, void *bb_field, |
| instrlist_t *ilist, instr_t *where, reg_id_t reg_ptr, |
| int adjust, instr_t *app, bool memref_needs_full_info, |
| uintptr_t mode) |
| { |
| per_block_t *per_block = reinterpret_cast<per_block_t *>(bb_field); |
| app_pc pc; |
| reg_id_t reg_tmp; |
| if (!memref_needs_full_info) { |
| // We write just once per bb, if not filtering. |
| if (per_block->instr_count > MAX_INSTR_COUNT) |
| return adjust; |
| pc = dr_fragment_app_pc(tag); |
| } else { |
| DR_ASSERT(instr_is_app(app)); |
| pc = instr_get_app_pc(app); |
| } |
| drreg_status_t res = |
| drreg_reserve_register(drcontext, ilist, where, reg_vector_, ®_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| adjust += insert_save_pc( |
| drcontext, ilist, where, reg_ptr, reg_tmp, adjust, pc, |
| memref_needs_full_info ? 1 : static_cast<uint>(per_block->instr_count), |
| per_block); |
| if (!memref_needs_full_info) |
| per_block->instr_count = MAX_INSTR_COUNT + 1; |
| res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| return adjust; |
| } |
| |
| int |
| offline_instru_t::instrument_ibundle(void *drcontext, instrlist_t *ilist, instr_t *where, |
| reg_id_t reg_ptr, int adjust, instr_t **delay_instrs, |
| int num_delay_instrs) |
| { |
| // The post-processor fills in all instr info other than our once-per-bb entry. |
| return adjust; |
| } |
| |
| int |
| offline_instru_t::instrument_instr_encoding(void *drcontext, void *tag, void *bb_field, |
| instrlist_t *ilist, instr_t *where, |
| reg_id_t reg_ptr, int adjust, instr_t *app) |
| { |
| // We emit non-module-code or modified-module-code encodings separately in |
| // record_instr_encodings(). Encodings for static code are added in the |
| // post-processor. |
| return adjust; |
| } |
| |
| int |
| offline_instru_t::instrument_rseq_entry(void *drcontext, instrlist_t *ilist, |
| instr_t *where, instr_t *rseq_label, |
| reg_id_t reg_ptr, int adjust) |
| { |
| dr_instr_label_data_t *data = instr_get_label_data_area(rseq_label); |
| reg_id_t reg_tmp; |
| drreg_status_t res = |
| drreg_reserve_register(drcontext, ilist, where, reg_vector_, ®_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| // We may need 2 entries for our marker. We write the entry marker with payload |
| // data[0]==rseq end. We do not use a separate marker to write data[1]==rseq |
| // handler as an abort marker will have the handler. |
| static constexpr int RSEQ_LABEL_END_PC_INDEX = 0; |
| offline_entry_t entries[2]; |
| int size = append_marker((byte *)entries, TRACE_MARKER_TYPE_RSEQ_ENTRY, |
| data->data[RSEQ_LABEL_END_PC_INDEX]); |
| DR_ASSERT(size % sizeof(offline_entry_t) == 0); |
| size /= sizeof(offline_entry_t); |
| DR_ASSERT(size <= static_cast<int>(sizeof(entries))); |
| for (int i = 0; i < size; i++) { |
| adjust += insert_save_entry(drcontext, ilist, where, reg_ptr, reg_tmp, adjust, |
| &entries[i]); |
| } |
| res = drreg_unreserve_register(drcontext, ilist, where, reg_tmp); |
| DR_ASSERT(res == DRREG_SUCCESS); // Can't recover. |
| return adjust; |
| } |
| |
| void |
| offline_instru_t::bb_analysis(void *drcontext, void *tag, void **bb_field, |
| instrlist_t *ilist, bool repstr_expanded, |
| bool memref_needs_full_info) |
| { |
| per_block_t *per_block = |
| reinterpret_cast<per_block_t *>(dr_thread_alloc(drcontext, sizeof(*per_block))); |
| *bb_field = per_block; |
| |
| per_block->instr_count = instru_t::count_app_instrs(ilist); |
| |
| app_pc tag_pc = dr_fragment_app_pc(tag); |
| per_block->start_pc = tag_pc; |
| |
| identify_elidable_addresses(drcontext, ilist, OFFLINE_FILE_VERSION, |
| memref_needs_full_info); |
| |
| if (does_pc_require_encoding(drcontext, tag_pc, nullptr, nullptr)) { |
| // For (unmodified) library code we do not need to record encodings as we |
| // rely on access to the binary during post-processing. |
| // |
| // TODO i#2062: We need to also identify modified library code and record |
| // its encodings. The plan is to augment drmodtrack to track this for us; |
| // for now we will incorrectly use the original bits in the trace. |
| record_instr_encodings(drcontext, tag_pc, per_block, ilist); |
| } |
| } |
| |
| void |
| offline_instru_t::bb_analysis_cleanup(void *drcontext, void *bb_field) |
| { |
| dr_thread_free(drcontext, bb_field, sizeof(per_block_t)); |
| } |
| |
| bool |
| offline_instru_t::opnd_is_elidable(opnd_t memop, DR_PARAM_OUT reg_id_t &base, int version) |
| { |
| if (version <= OFFLINE_FILE_VERSION_NO_ELISION) |
| return false; |
| // When adding new elision cases, be sure to check "version" to keep backward |
| // compatibility. For OFFLINE_FILE_VERSION_ELIDE_UNMOD_BASE we elide a |
| // base register that has not changed since a prior stored address (with no |
| // index register). We include rip-relative in this category. |
| // Here we look for rip-relative and no-index operands: opnd_check_elidable() |
| // checks for an unchanged prior instance. |
| if (IF_REL_ADDRS(opnd_is_near_rel_addr(memop) ||) opnd_is_near_abs_addr(memop)) { |
| base = DR_REG_NULL; |
| return true; |
| } |
| if (!opnd_is_near_base_disp(memop) || |
| // We're assuming displacements are all factored out, such that we can share |
| // a base across all uses without subtracting the original disp. |
| // TODO(i#4898): This is blocking elision of SP bases on AArch64. We should |
| // add disp subtraction by storing the disp along with reg_vals in raw2trace |
| // for AArch64. |
| !opnd_disp_is_elidable(memop) || |
| (opnd_get_base(memop) != DR_REG_NULL && opnd_get_index(memop) != DR_REG_NULL)) |
| return false; |
| base = opnd_get_base(memop); |
| if (base == DR_REG_NULL) |
| base = opnd_get_index(memop); |
| return true; |
| } |
| |
| void |
| offline_instru_t::opnd_check_elidable(void *drcontext, instrlist_t *ilist, instr_t *instr, |
| opnd_t memop, int op_index, int memop_index, |
| bool write, int version, reg_id_set_t &saw_base) |
| { |
| // We elide single-register (base or index) operands that only differ in |
| // displacement, as well as rip-relative or absolute-address operands. |
| reg_id_t base; |
| if (!opnd_is_elidable(memop, base, version)) |
| return; |
| // When adding new elision cases, be sure to check "version" to keep backward |
| // compatibility. See the opnd_is_elidable() notes. Here we insert a label if |
| // we find a base that has not changed or a rip-relative operand. |
| if (base == DR_REG_NULL || saw_base.find(base) != saw_base.end()) { |
| instr_t *note = INSTR_CREATE_label(drcontext); |
| instr_set_note(note, (void *)elide_memref_note_); |
| dr_instr_label_data_t *data = instr_get_label_data_area(note); |
| data->data[LABEL_DATA_ELIDED_INDEX] = op_index; |
| data->data[LABEL_DATA_ELIDED_MEMOP_INDEX] = memop_index; |
| data->data[LABEL_DATA_ELIDED_IS_WRITE] = write; |
| data->data[LABEL_DATA_ELIDED_NEEDS_BASE] = (base != DR_REG_NULL); |
| MINSERT(ilist, instr, note); |
| } else |
| saw_base.insert(base); |
| } |
| |
| bool |
| offline_instru_t::label_marks_elidable(instr_t *instr, DR_PARAM_OUT int *opnd_index, |
| DR_PARAM_OUT int *memopnd_index, |
| DR_PARAM_OUT bool *is_write, |
| DR_PARAM_OUT bool *needs_base) |
| { |
| if (!instr_is_label(instr)) |
| return false; |
| if (instr_get_note(instr) != (void *)elide_memref_note_) |
| return false; |
| dr_instr_label_data_t *data = instr_get_label_data_area(instr); |
| if (opnd_index != nullptr) |
| *opnd_index = static_cast<int>(data->data[LABEL_DATA_ELIDED_INDEX]); |
| if (memopnd_index != nullptr) |
| *memopnd_index = static_cast<int>(data->data[LABEL_DATA_ELIDED_MEMOP_INDEX]); |
| // The !! is to work around MSVC's warning C4800 about casting int to bool. |
| if (is_write != nullptr) |
| *is_write = static_cast<bool>(!!data->data[LABEL_DATA_ELIDED_IS_WRITE]); |
| if (needs_base != nullptr) |
| *needs_base = static_cast<bool>(!!data->data[LABEL_DATA_ELIDED_NEEDS_BASE]); |
| return true; |
| } |
| |
| void |
| offline_instru_t::identify_elidable_addresses(void *drcontext, instrlist_t *ilist, |
| int version, bool memref_needs_full_info) |
| { |
| // Analysis for eliding redundant addresses we can reconstruct during |
| // post-processing. |
| if (disable_optimizations_) |
| return; |
| // We can't elide when doing filtering. |
| if (memref_needs_full_info) |
| return; |
| reg_id_set_t saw_base; |
| for (instr_t *instr = instrlist_first(ilist); instr != NULL; |
| instr = instr_get_next(instr)) { |
| // XXX: We turn off address elision for bbs containing emulation sequences |
| // or instrs that are expanded into emulation sequences like scatter/gather |
| // and rep stringop. As instru_offline and raw2trace see different instrs in |
| // these bbs (expanded seq vs original app instr), there may be mismatches in |
| // identifying elision opportunities. We can possibly provide a consistent |
| // view by expanding the instr in raw2trace (e.g. using |
| // drx_expand_scatter_gather) when building the ilist. |
| if (drutil_instr_is_stringop_loop(instr) |
| IF_X86_OR_AARCH64(|| instr_is_scatter(instr) || instr_is_gather(instr))) { |
| return; |
| } |
| if (drmgr_is_emulation_start(instr) || drmgr_is_emulation_end(instr)) { |
| return; |
| } |
| } |
| for (instr_t *instr = instrlist_first_app(ilist); instr != NULL; |
| instr = instr_get_next_app(instr)) { |
| // For now we bail at predication. |
| if (instr_get_predicate(instr) != DR_PRED_NONE) { |
| saw_base.clear(); |
| continue; |
| } |
| // Use instr_{reads,writes}_memory() to rule out LEA and NOP. |
| if (instr_reads_memory(instr) || instr_writes_memory(instr)) { |
| int mem_count = 0; |
| for (int i = 0; i < instr_num_srcs(instr); i++) { |
| if (opnd_is_memory_reference(instr_get_src(instr, i))) { |
| opnd_check_elidable(drcontext, ilist, instr, instr_get_src(instr, i), |
| i, mem_count, false, version, saw_base); |
| ++mem_count; |
| } |
| } |
| // Rule out sharing with any dest if the base is written to. The ISA |
| // does not specify the ordering of multiple dests. |
| auto reg_it = saw_base.begin(); |
| while (reg_it != saw_base.end()) { |
| if (instr_writes_to_reg(instr, *reg_it, DR_QUERY_INCLUDE_COND_DSTS)) |
| reg_it = saw_base.erase(reg_it); |
| else |
| ++reg_it; |
| } |
| mem_count = 0; |
| for (int i = 0; i < instr_num_dsts(instr); i++) { |
| if (opnd_is_memory_reference(instr_get_dst(instr, i))) { |
| opnd_check_elidable(drcontext, ilist, instr, instr_get_dst(instr, i), |
| i, mem_count, true, version, saw_base); |
| ++mem_count; |
| } |
| } |
| } |
| // Rule out sharing with subsequent instrs if the base is written to. |
| // TODO(i#2001): Add special support for eliding the xsp base of push+pop |
| // instructions. |
| auto reg_it = saw_base.begin(); |
| while (reg_it != saw_base.end()) { |
| if (instr_writes_to_reg(instr, *reg_it, DR_QUERY_INCLUDE_COND_DSTS)) |
| reg_it = saw_base.erase(reg_it); |
| else |
| ++reg_it; |
| } |
| } |
| } |
| |
| } // namespace drmemtrace |
| } // namespace dynamorio |