i#6662 public traces, part 5: func_id_filter_t (#6820)

Adds a new filter: `func_id_filter_t` to record_filter, which filters
TRACE_MARKER_TYPE_FUNC_ markers based on the function ID.

The filter is enabled by `-filter_keep_func_ids` followed by a list of
integers that represent the function IDs bound to
TRACE_MARKER_TYPE_FUNC_ markers to keep in the trace.
Specifically, whenever we encounter a TRACE_MARKER_TYPE_FUNC_ID marker
whose marker value is in the list we set a per-shard flag to indicate
that all TRACE_MARKER_TYPE_FUNC_[ID | ARG | RETVAL | RETADDR] markers
related to that function ID need to be preserved. We remove the
TRACE_MARKER_TYPE_FUNC_ markers related to functions whose ID is not in
the list.

This filter can be invoked with:
```
drrun -t drmemtrace -tool record_filter -filter_keep_func_ids 1,2,3,4 -indir path/to/input/trace -outdir path/to/output/trace
```
To preserve TRACE_MARKER_TYPE_FUNC_ markers related to functions with
ID: 1, 2, 3, 4, and remove the TRACE_MARKER_TYPE_FUNC_ markers for all
other ID values.

We use this filter to preserve markers related to SYS_futex functions in
the public release of traces.

Issue #6662
diff --git a/api/docs/release.dox b/api/docs/release.dox
index 5b2efe5..d1fde92 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -241,6 +241,10 @@
    trace analysis tool framework.
  - Added "-t drmemtrace" as the preferred launcher for the drmemtrace/drcachesim
    trace analysis tool framework.
+ - Added func_id_filter_t to #dynamorio::drmemtrace::record_filter_t to filter function
+   markers based on the function ID. This filter is enabled by "-filter_keep_func_ids"
+   followed by a comma-separated list of function IDs to preserve in the output trace.
+   All function markers whose ID is not in the list are removed.
 
 **************************************************
 <hr>
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt
index 0f78e4a..9b955f6 100644
--- a/clients/drcachesim/CMakeLists.txt
+++ b/clients/drcachesim/CMakeLists.txt
@@ -198,6 +198,7 @@
   tools/filter/cache_filter.cpp
   tools/filter/type_filter.h
   tools/filter/encodings2regdeps_filter.h
+  tools/filter/func_id_filter.h
   tools/filter/null_filter.h)
 target_link_libraries(drmemtrace_record_filter drmemtrace_simulator)
 configure_DynamoRIO_standalone(drmemtrace_record_filter)
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
index 5cbbace..c0a3cb5 100644
--- a/clients/drcachesim/analyzer_multi.cpp
+++ b/clients/drcachesim/analyzer_multi.cpp
@@ -334,7 +334,7 @@
             op_filter_cache_size.get_value(), op_filter_trace_types.get_value(),
             op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(),
             op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(),
-            op_verbose.get_value());
+            op_filter_func_ids.get_value(), op_verbose.get_value());
     }
     ERRMSG("Usage error: unsupported record analyzer type \"%s\".  Only " RECORD_FILTER
            " is supported.\n",
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index e6d5f16..6d2c51e 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -980,9 +980,21 @@
 droption_t<bool> op_encodings2regdeps(
     DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false,
     "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.",
-    "This option is for -simulator_type " RECORD_FILTER ". When present, it converts "
+    "This option is for -tool " RECORD_FILTER ". When present, it converts "
     "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA.");
 
+/* XXX i#6369: we should partition our options by tool. This one should belong to the
+ * record_filter partition. For now we add the filter_ prefix to options that should be
+ * used in conjunction with record_filter.
+ */
+droption_t<std::string>
+    op_filter_func_ids(DROPTION_SCOPE_FRONTEND, "filter_keep_func_ids", "",
+                       "Comma-separated integers of function IDs to keep.",
+                       "This option is for -tool " RECORD_FILTER ". It preserves "
+                       "TRACE_MARKER_TYPE_FUNC_[ID | ARG | RETVAL | RETADDR] markers "
+                       "for the listed function IDs and removes those belonging to "
+                       "unlisted function IDs.");
+
 droption_t<uint64_t> op_trim_before_timestamp(
     DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0,
     (std::numeric_limits<uint64_t>::max)(),
diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
index 9603ba0..20286b9 100644
--- a/clients/drcachesim/common/options.h
+++ b/clients/drcachesim/common/options.h
@@ -216,6 +216,7 @@
 extern dynamorio::droption::droption_t<std::string> op_filter_trace_types;
 extern dynamorio::droption::droption_t<std::string> op_filter_marker_types;
 extern dynamorio::droption::droption_t<bool> op_encodings2regdeps;
+extern dynamorio::droption::droption_t<std::string> op_filter_func_ids;
 extern dynamorio::droption::droption_t<uint64_t> op_trim_before_timestamp;
 extern dynamorio::droption::droption_t<uint64_t> op_trim_after_timestamp;
 extern dynamorio::droption::droption_t<bool> op_abort_on_invariant_error;
diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp
index 0e60555..9b09e43 100644
--- a/clients/drcachesim/tests/record_filter_unit_tests.cpp
+++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp
@@ -43,9 +43,11 @@
 #include "tools/filter/trim_filter.h"
 #include "tools/filter/type_filter.h"
 #include "tools/filter/encodings2regdeps_filter.h"
+#include "tools/filter/func_id_filter.h"
 #include "trace_entry.h"
 #include "zipfile_ostream.h"
 
+#include <cstdint>
 #include <inttypes.h>
 #include <fstream>
 #include <set>
@@ -290,6 +292,8 @@
     return true;
 }
 
+/* Test changes in instruction encodings.
+ */
 static bool
 test_encodings2regdeps_filter()
 {
@@ -401,12 +405,12 @@
         { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } },
     };
 
-    /* Construct encodings2regdeps_filter.
+    /* Construct encodings2regdeps_filter_t.
      */
     std::vector<std::unique_ptr<record_filter_func_t>> filters;
     auto encodings2regdeps_filter = std::unique_ptr<record_filter_func_t>(
         new dynamorio::drmemtrace::encodings2regdeps_filter_t());
-    if (encodings2regdeps_filter->get_error_string() != "") {
+    if (!encodings2regdeps_filter->get_error_string().empty()) {
         fprintf(stderr, "Couldn't construct a encodings2regdeps_filter %s",
                 encodings2regdeps_filter->get_error_string().c_str());
         return false;
@@ -427,6 +431,175 @@
     return true;
 }
 
+/* Test preservation of function-related markers (TRACE_MARKER_TYPE_FUNC_[ID | RETADDR |
+ * ARG | RETVAL) based on function ID (marker value of TRACE_MARKER_TYPE_FUNC_ID).
+ */
+static bool
+test_func_id_filter()
+{
+
+    constexpr addr_t SYS_FUTEX = 202;
+    constexpr addr_t SYS_FSYNC = 74;
+    constexpr addr_t SYSCALL_BASE =
+        static_cast<addr_t>(func_trace_t::TRACE_FUNC_ID_SYSCALL_BASE);
+    constexpr addr_t SYSCALL_FUTEX_ID = SYS_FUTEX + SYSCALL_BASE;
+    constexpr addr_t SYSCALL_FSYNC_ID = SYS_FSYNC + SYSCALL_BASE;
+    constexpr addr_t FUNC_ID_TO_KEEP = 7;
+    constexpr addr_t FUNC_ID_TO_REMOVE = 8;
+    constexpr addr_t PC = 0x7f6fdd3ec360;
+    constexpr addr_t ENCODING = 0xe78948;
+    std::vector<test_case_t> entries = {
+        /* Trace shard header.
+         */
+        { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } },
+        { { TRACE_TYPE_MARKER,
+            TRACE_MARKER_TYPE_FILETYPE,
+            { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS |
+              OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } },
+          true,
+          { true } },
+        { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } },
+        { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } },
+          true,
+          { true } },
+
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } },
+        /* We need at least one instruction with encodings to make record_filter output
+         * the trace.
+         */
+        { { TRACE_TYPE_ENCODING, 3, { ENCODING } }, true, { true } },
+        { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { SYSCALL_FUTEX_ID } },
+          true,
+          { true } },
+        /* We don't care about the arg values, we just care that they are preserved.
+         * We use some non-zero values to make sure we're not creating new, uninitialized
+         * markers.
+         * Note: SYS_futex has 6 args.
+         */
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x2 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x3 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x4 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x5 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x6 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { SYSCALL_FUTEX_ID } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x789 } },
+          true,
+          { true } },
+        /* Test that func_id_filter_t doesn't output any
+         * TRACE_MARKER_TYPE_FUNC_ for functions that are not SYS_futex.
+         * We use SYS_fsync in this test.
+         */
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { SYSCALL_FSYNC_ID } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { SYSCALL_FSYNC_ID } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x234 } },
+          true,
+          { false } },
+
+        /* Nested functions. Keep outer, remove inner.
+         */
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_KEEP } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0xbeef } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { true } },
+
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_REMOVE } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0xdead } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_REMOVE } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x234 } },
+          true,
+          { false } },
+
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_KEEP } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x234 } },
+          true,
+          { true } },
+
+        /* Nested functions. Remove outer, keep inner.
+         */
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_REMOVE } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0xdead } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { false } },
+
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_KEEP } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETADDR, { 0xbeef } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ARG, { 0x1 } }, true, { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_KEEP } },
+          true,
+          { true } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x234 } },
+          true,
+          { true } },
+
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_ID, { FUNC_ID_TO_REMOVE } },
+          true,
+          { false } },
+        { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FUNC_RETVAL, { 0x234 } },
+          true,
+          { false } },
+
+        { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } },
+    };
+
+    /* Construct func_id_filter_t.
+     */
+    std::vector<uint64_t> func_ids_to_keep = { static_cast<uint64_t>(SYSCALL_FUTEX_ID),
+                                               static_cast<uint64_t>(FUNC_ID_TO_KEEP) };
+    std::vector<std::unique_ptr<record_filter_func_t>> filters;
+    auto func_id_filter = std::unique_ptr<record_filter_func_t>(
+        new dynamorio::drmemtrace::func_id_filter_t(func_ids_to_keep));
+    if (!func_id_filter->get_error_string().empty()) {
+        fprintf(stderr, "Couldn't construct a func_id_filter %s",
+                func_id_filter->get_error_string().c_str());
+        return false;
+    }
+    filters.push_back(std::move(func_id_filter));
+
+    /* Construct record_filter_t.
+     */
+    auto record_filter = std::unique_ptr<test_record_filter_t>(
+        new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true));
+
+    /* Run the test.
+     */
+    if (!process_entries_and_check_result(record_filter.get(), entries, 0))
+        return false;
+
+    fprintf(stderr, "test_func_id_filter passed\n");
+    return true;
+}
+
 static bool
 test_cache_and_type_filter()
 {
@@ -1247,7 +1420,8 @@
                     droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str());
     }
     if (!test_cache_and_type_filter() || !test_chunk_update() || !test_trim_filter() ||
-        !test_null_filter() || !test_wait_filter() || !test_encodings2regdeps_filter())
+        !test_null_filter() || !test_wait_filter() || !test_encodings2regdeps_filter() ||
+        !test_func_id_filter())
         return 1;
     fprintf(stderr, "All done!\n");
     return 0;
diff --git a/clients/drcachesim/tools/filter/func_id_filter.h b/clients/drcachesim/tools/filter/func_id_filter.h
new file mode 100644
index 0000000..0850182
--- /dev/null
+++ b/clients/drcachesim/tools/filter/func_id_filter.h
@@ -0,0 +1,140 @@
+/* **********************************************************
+ * Copyright (c) 2022-2024 Google, Inc.  All rights reserved.
+ * **********************************************************/
+
+/*
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Google, Inc. nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without
+ *   specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#ifndef _FUNC_ID_FILTER_H_
+#define _FUNC_ID_FILTER_H_ 1
+
+#include "record_filter.h"
+#include "trace_entry.h"
+#include "utils.h"
+
+#include <cstring>
+#include <unordered_set>
+#include <vector>
+
+namespace dynamorio {
+namespace drmemtrace {
+
+/* This filter takes a list of function IDs for which it preserves the related
+ * TRACE_MARKER_TYPE_FUNC_[ID | ARG | RETVAL | RETADDR] markers. It removes all the other
+ * TRACE_MARKER_TYPE_FUNC_ markers related to function IDs that are not in the list.
+ */
+class func_id_filter_t : public record_filter_t::record_filter_func_t {
+public:
+    func_id_filter_t(std::vector<uint64_t> keep_func_ids_list)
+    {
+        keep_func_ids_set_.insert(keep_func_ids_list.cbegin(), keep_func_ids_list.cend());
+    }
+
+    void *
+    parallel_shard_init(memtrace_stream_t *shard_stream,
+                        bool partial_trace_filter) override
+    {
+        per_shard_t *per_shard = new per_shard_t;
+        per_shard->output_func_markers = false;
+        return per_shard;
+    }
+
+    bool
+    parallel_shard_filter(
+        trace_entry_t &entry, void *shard_data,
+        record_filter_t::record_filter_info_t &record_filter_info) override
+    {
+        /* Get per_shard private data.
+         */
+        per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
+
+        trace_type_t entry_type = static_cast<trace_type_t>(entry.type);
+        /* Output any trace_entry_t that it's not a marker.
+         */
+        if (entry_type != TRACE_TYPE_MARKER)
+            return true;
+
+        trace_marker_type_t marker_type = static_cast<trace_marker_type_t>(entry.size);
+        switch (marker_type) {
+        case TRACE_MARKER_TYPE_FUNC_ID: {
+            /* Function markers follow this sequence:
+             * TRACE_MARKER_TYPE_FUNC_ID
+             * [TRACE_MARKER_TYPE_FUNC_RETADDR]
+             * [TRACE_MARKER_TYPE_FUNC_ARG]*
+             *
+             * [entries (instructions, other function markers, etc.)]*
+             *
+             * TRACE_MARKER_TYPE_FUNC_ID
+             * TRACE_MARKER_TYPE_FUNC_RETVAL
+             *
+             * ([] = 0 or 1, []* = 0 or more)
+             *
+             * Because TRACE_MARKER_TYPE_FUNC_ID always precedes the remaining
+             * function-related markers, we can simply set
+             * per_shard->output_func_markers based on the TRACE_MARKER_TYPE_FUNC_ID
+             * marker value to handle nested functions.
+             */
+            uint64_t func_id = static_cast<uint64_t>(entry.addr);
+            per_shard->output_func_markers =
+                keep_func_ids_set_.find(func_id) != keep_func_ids_set_.end();
+            return per_shard->output_func_markers;
+        }
+        case TRACE_MARKER_TYPE_FUNC_ARG:
+        case TRACE_MARKER_TYPE_FUNC_RETVAL:
+        case TRACE_MARKER_TYPE_FUNC_RETADDR:
+            /* Output these markers only if they belong to a function whose ID we want
+             * to keep.
+             */
+            return per_shard->output_func_markers;
+        /* In func_id_filter_t we only handle TRACE_MARKER_TYPE_FUNC_ID,
+         * TRACE_MARKER_TYPE_FUNC_ARG, TRACE_MARKER_TYPE_FUNC_RETVAL,
+         * TRACE_MARKER_TYPE_FUNC_RETADDR. By default we output all other markers.
+         */
+        default: return true;
+        }
+    }
+
+    bool
+    parallel_shard_exit(void *shard_data) override
+    {
+        per_shard_t *per_shard = reinterpret_cast<per_shard_t *>(shard_data);
+        delete per_shard;
+        return true;
+    }
+
+private:
+    struct per_shard_t {
+        bool output_func_markers;
+    };
+
+    std::unordered_set<uint64_t> keep_func_ids_set_;
+};
+
+} // namespace drmemtrace
+} // namespace dynamorio
+#endif /* _FUNC_ID_FILTER_H_ */
diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp
index f248ae3..2c317dd 100644
--- a/clients/drcachesim/tools/filter/record_filter.cpp
+++ b/clients/drcachesim/tools/filter/record_filter.cpp
@@ -61,6 +61,7 @@
 #include "trim_filter.h"
 #include "type_filter.h"
 #include "encodings2regdeps_filter.h"
+#include "func_id_filter.h"
 
 #undef VPRINT
 #ifdef DEBUG
@@ -107,7 +108,8 @@
                           int cache_filter_size, const std::string &remove_trace_types,
                           const std::string &remove_marker_types,
                           uint64_t trim_before_timestamp, uint64_t trim_after_timestamp,
-                          bool encodings2regdeps, unsigned int verbose)
+                          bool encodings2regdeps, const std::string &keep_func_ids,
+                          unsigned int verbose)
 {
     std::vector<
         std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>>
@@ -142,6 +144,12 @@
             std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
                 new dynamorio::drmemtrace::encodings2regdeps_filter_t()));
     }
+    if (!keep_func_ids.empty()) {
+        std::vector<uint64_t> keep_func_ids_list = parse_string<uint64_t>(keep_func_ids);
+        filter_funcs.emplace_back(
+            std::unique_ptr<dynamorio::drmemtrace::record_filter_t::record_filter_func_t>(
+                new dynamorio::drmemtrace::func_id_filter_t(keep_func_ids_list)));
+    }
 
     // TODO i#5675: Add other filters.
 
diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h
index 7e3b750..0516d7b 100644
--- a/clients/drcachesim/tools/filter/record_filter_create.h
+++ b/clients/drcachesim/tools/filter/record_filter_create.h
@@ -64,6 +64,9 @@
  *   whose value is greater than this parameter.
  * @param[in] encodings2regdeps  If true, converts instruction encodings from the real ISA
  *   of the input trace to the #DR_ISA_REGDEPS synthetic ISA.
+ * @param[in] keep_func_ids  A comma-separated list of integers representing the
+ *   function IDs related to #TRACE_MARKER_TYPE_FUNC_ID (and _ARG, _RETVAL, _RETADDR)
+ *   markers to preserve in the trace, while removing all other function markers.
  * @param[in] verbose  Verbosity level for notifications.
  */
 record_analysis_tool_t *
@@ -71,7 +74,8 @@
                           int cache_filter_size, const std::string &remove_trace_types,
                           const std::string &remove_marker_types,
                           uint64_t trim_before_timestamp, uint64_t trim_after_timestamp,
-                          bool encodings2regdeps, unsigned int verbose);
+                          bool encodings2regdeps, const std::string &keep_func_ids,
+                          unsigned int verbose);
 
 } // namespace drmemtrace
 } // namespace dynamorio
diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp
index 53adf2c..9d6fe3e 100644
--- a/clients/drcachesim/tools/record_filter_launcher.cpp
+++ b/clients/drcachesim/tools/record_filter_launcher.cpp
@@ -128,8 +128,16 @@
 droption_t<bool> op_encodings2regdeps(
     DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false,
     "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.",
-    "This option is for -simulator_type record_filter. When present, it converts "
+    "This option is for -tool record_filter. When present, it converts "
     "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA.");
+
+droption_t<std::string>
+    op_filter_func_ids(DROPTION_SCOPE_FRONTEND, "filter_keep_func_ids", "",
+                       "Comma-separated integers of function IDs to keep.",
+                       "This option is for -tool record_filter. It preserves "
+                       "TRACE_MARKER_TYPE_FUNC_[ID | ARG | RETVAL | RETADDR] "
+                       "markers for the listed function IDs and removed those "
+                       "belonging to unlisted function IDs.");
 } // namespace
 
 int
@@ -160,7 +168,7 @@
             op_cache_filter_size.get_value(), op_remove_trace_types.get_value(),
             op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(),
             op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(),
-            op_verbose.get_value()));
+            op_filter_func_ids.get_value(), op_verbose.get_value()));
     std::vector<record_analysis_tool_t *> tools;
     tools.push_back(record_filter.get());