tast-tests: Collect system trace in CUJs

Add a perfetto config to collect system trace and allow collecting
system trace in cujrecorder.Recorder.

BUG=b:232908163
TEST=tast run <DUT> ui.MeetCUJ.16p_trace ui.MeetCUJ.lacros_16p_trace

Change-Id: I043e8933ca1a5fd03ac8f32e3c2d8966f3a67b77
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/tast-tests/+/3651662
Reviewed-by: Alexander Alekseev <alemate@chromium.org>
Auto-Submit: Yichen Zhou <yichenz@chromium.org>
Tested-by: Yichen Zhou <yichenz@chromium.org>
Reviewed-by: Eric Caruso <ejcaruso@chromium.org>
Reviewed-by: Avery Musbach <amusbach@chromium.org>
Commit-Queue: Yichen Zhou <yichenz@chromium.org>
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/data/perfetto/system_trace_config.pbtxt b/src/chromiumos/tast/local/bundles/cros/ui/data/perfetto/system_trace_config.pbtxt
new file mode 100644
index 0000000..db2322c
--- /dev/null
+++ b/src/chromiumos/tast/local/bundles/cros/ui/data/perfetto/system_trace_config.pbtxt
@@ -0,0 +1,140 @@
+# This is inspired from the internal config: go/perfetto-aot-merged-config, docs/long-traces.md
+
+# Enable periodic flushing of the trace buffer into the output file.
+write_into_file: true
+
+# Max duration: 30 mins.
+duration_ms: 1800000
+
+# Writes the userspace buffer into the file every 2 seconds.
+# The data rate of a typical trace is ~1-4 MB/s. So a 8MB in-memory buffer can hold for up
+# write periods of ~2 seconds before starting to lose data.
+# See https://perfetto.dev/docs/concepts/config#long-traces.
+file_write_period_ms: 2000
+
+# We need to guarantee that events stay within some defined sorting window (for the importer).
+flush_period_ms: 20000
+
+# Stops the tracing session after 300,000,000 bytes have been written.
+max_file_size_bytes: 300000000
+
+# Buffer 0: for process stats, system stats.
+buffers {
+    size_kb: 7168
+    fill_policy: RING_BUFFER
+}
+
+# Buffer 1: for ftrace.
+buffers {
+    size_kb: 7168
+    fill_policy: RING_BUFFER
+}
+
+# Buffer 2: for chromium trace and metadata.
+buffers {
+    size_kb: 14336
+    fill_policy: RING_BUFFER
+}
+
+# Scheduling details.
+data_sources: {
+    config {
+        name: "linux.process_stats"
+        target_buffer: 0
+        process_stats_config {
+            scan_all_processes_on_start: true
+            proc_stats_poll_ms: 1000
+        }
+    }
+}
+
+# Coarse CPU usage counter, kernel meminfo and virtual memory stats.
+data_sources: {
+    config {
+        name: "linux.sys_stats"
+        target_buffer: 0
+        sys_stats_config {
+            meminfo_period_ms: 1000
+            vmstat_period_ms: 1000
+            stat_period_ms: 1000
+            stat_counters: STAT_CPU_TIMES
+            stat_counters: STAT_FORK_COUNT
+        }
+    }
+}
+
+# Ftrace.
+data_sources: {
+    config {
+        name: "linux.ftrace"
+        target_buffer: 1
+        ftrace_config {
+            # Enables symbol name resolution against /proc/kallsyms.
+            symbolize_ksyms: true
+
+            # Scheduling information & process tracking. Useful for:
+            # - what is happening on each CPU at each moment
+            # - why a thread was descheduled
+            # - parent/child relationships between processes and threads.
+            ftrace_events: "power/suspend_resume"
+            ftrace_events: "sched/sched_blocked_reason"
+            ftrace_events: "sched/sched_process_free"
+            ftrace_events: "sched/sched_switch"
+            ftrace_events: "task/task_newtask"
+            ftrace_events: "task/task_rename"
+
+            # User process page faults.
+            ftrace_events: "exceptions/page_fault_user"
+
+            # Wakeup info. Allows you to compute how long a task was
+            # blocked due to CPU contention.
+            ftrace_events: "sched/sched_waking"
+            ftrace_events: "sched/sched_wakeup_new"
+
+            # Cpu events.
+            ftrace_events: "power/cpu_frequency"
+            ftrace_events: "power/cpu_idle"
+
+            compact_sched {
+                # When enabled, specific high-volume events are encoded in a denser format than normal.
+                enabled: true
+            }
+        }
+    }
+}
+
+# Chrome trace events.
+data_sources: {
+    config {
+        name: "org.chromium.trace_event"
+        target_buffer: 0
+        chrome_config {
+            # Categories: toplevel, cc, gpu, viz, ui, views, benchmark, input, memory-infra.
+            trace_config: "{\"record_mode\":\"record-until-full\",\"included_categories\":[\"toplevel\",\"cc\",\"gpu\",\"viz\",\"ui\",\"views\",\"benchmark\",\"input\",\"memory-infra\"],\"memory_dump_config\":{}}"
+            # When enabled, the data source should only fill in fields in the output that are not potentially privacy sensitive.
+            privacy_filtering_enabled: false
+            # Don't convert the trace data to the legacy JSON format.
+            convert_to_legacy_json: false
+            # Priority of the tracing session client.
+            client_priority: UNKNOWN
+        }
+    }
+}
+
+# Chrome trace metadata.
+data_sources: {
+    config {
+        name: "org.chromium.trace_metadata"
+        target_buffer: 0
+        chrome_config {
+            # Categories: toplevel, cc, gpu, viz, ui, views, benchmark, input, memory-infra.
+            trace_config: "{\"record_mode\":\"record-until-full\",\"included_categories\":[\"toplevel\",\"cc\",\"gpu\",\"viz\",\"ui\",\"views\",\"benchmark\",\"input\",\"memory-infra\"],\"memory_dump_config\":{}}"
+            # When enabled, the data source should only fill in fields in the output that are not potentially privacy sensitive.
+            privacy_filtering_enabled: false
+            # Don't convert the trace data to the legacy JSON format.
+            convert_to_legacy_json: false
+            # Priority of the tracing session client.
+            client_priority: UNKNOWN
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/desks_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/desks_cuj.go
index 18d042e..15378c9 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/desks_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/desks_cuj.go
@@ -28,6 +28,7 @@
 		Contacts:     []string{"amusbach@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		Attr:         []string{"group:crosbolt", "crosbolt_perbuild", "group:cuj"},
 		SoftwareDeps: []string{"chrome"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		Timeout:      2 * time.Hour,
 		Params: []testing.Param{{
 			Val:     browser.TypeAsh,
@@ -128,7 +129,7 @@
 	}
 	defer recorder.Close(cleanupCtx)
 
-	recorder.EnableTracing(s.OutDir())
+	recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 
 	if err := recorder.RunFor(ctx, func(ctx context.Context) error {
 		if err := ash.ActivateDeskAtIndex(ctx, tconn, 0); err != nil {
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/idle_perf.go b/src/chromiumos/tast/local/bundles/cros/ui/idle_perf.go
index 224de28..ce466c3 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/idle_perf.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/idle_perf.go
@@ -34,6 +34,7 @@
 		Contacts:     []string{"xiyuan@chromium.org", "yichenz@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		Attr:         []string{"group:crosbolt", "crosbolt_perbuild", "group:cuj"},
 		SoftwareDeps: []string{"chrome"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		Timeout:      cuj.CPUStablizationTimeout + idleDuration,
 		Pre:          arc.Booted(),
 		Params: []testing.Param{{
@@ -90,7 +91,7 @@
 		}
 	}()
 	if tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 
 	if err := recorder.Run(ctx, func(ctx context.Context) error {
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/meet_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/meet_cuj.go
index e7d66de..588b149 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/meet_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/meet_cuj.go
@@ -83,6 +83,7 @@
 		Desc:         "Measures the performance of critical user journey for Google Meet",
 		Contacts:     []string{"yichenz@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		SoftwareDeps: []string{"chrome", "arc"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		Vars: []string{
 			"mute",
 			"record",
@@ -458,7 +459,7 @@
 	}
 
 	if meet.tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 	defer func() {
 		if err := recorder.Close(closeCtx); err != nil {
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/tab_switch_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/tab_switch_cuj.go
index 7cd4415..a856e84 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/tab_switch_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/tab_switch_cuj.go
@@ -13,6 +13,7 @@
 	"chromiumos/tast/local/chrome/browser"
 	"chromiumos/tast/local/cpu"
 	"chromiumos/tast/local/power"
+	"chromiumos/tast/local/ui/cujrecorder"
 	"chromiumos/tast/local/wpr"
 	"chromiumos/tast/testing"
 )
@@ -25,6 +26,7 @@
 		Attr:         []string{"group:crosbolt", "crosbolt_perbuild", "group:cuj"},
 		Contacts:     []string{"yichenz@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		SoftwareDeps: []string{"chrome"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		Timeout:      22*time.Minute + cuj.CPUStablizationTimeout,
 		Vars:         []string{"mute"},
 		Params: []testing.Param{{
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/tabswitchcuj/tab_switch_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/tabswitchcuj/tab_switch_cuj.go
index 076f2a2..47db9cb 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/tabswitchcuj/tab_switch_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/tabswitchcuj/tab_switch_cuj.go
@@ -166,7 +166,7 @@
 	}
 
 	if param.Tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 	defer recorder.Close(closeCtx)
 
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/task_switch_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/task_switch_cuj.go
index 8051005..4fa486a 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/task_switch_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/task_switch_cuj.go
@@ -46,6 +46,7 @@
 		Contacts:     []string{"yichenz@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		Attr:         []string{"group:crosbolt", "crosbolt_perbuild", "group:cuj"},
 		SoftwareDeps: []string{"chrome"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		Timeout:      8 * time.Minute,
 		Vars:         []string{"mute"},
 		Params: []testing.Param{
@@ -405,7 +406,7 @@
 		s.Fatal("Failed to create a recorder: ", err)
 	}
 	if testParam.tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 	defer recorder.Close(closeCtx)
 
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/video_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/video_cuj.go
index 47ef8f2..cf66777 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/video_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/video_cuj.go
@@ -47,6 +47,7 @@
 		Contacts:     []string{"xiyuan@chromium.org", "chromeos-perfmetrics-eng@google.com"},
 		Attr:         []string{"group:crosbolt", "crosbolt_perbuild", "group:cuj"},
 		SoftwareDeps: []string{"chrome", "arc"},
+		Data:         []string{cujrecorder.SystemTraceConfigFile},
 		HardwareDeps: hwdep.D(hwdep.InternalDisplay()),
 		Timeout:      45 * time.Minute,
 		Vars: []string{
@@ -217,7 +218,7 @@
 		s.Fatal("Failed to create a recorder: ", err)
 	}
 	if testParam.tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 	defer recorder.Close(closeCtx)
 
diff --git a/src/chromiumos/tast/local/bundles/cros/ui/window_arrangement_cuj.go b/src/chromiumos/tast/local/bundles/cros/ui/window_arrangement_cuj.go
index 182d1d8..6b4dc1d4 100644
--- a/src/chromiumos/tast/local/bundles/cros/ui/window_arrangement_cuj.go
+++ b/src/chromiumos/tast/local/bundles/cros/ui/window_arrangement_cuj.go
@@ -42,7 +42,7 @@
 		HardwareDeps: hwdep.D(hwdep.InternalDisplay()),
 		Vars:         []string{"record"},
 		Timeout:      10*time.Minute + cuj.CPUStablizationTimeout,
-		Data:         []string{"shaka_720.webm", "pip.html"},
+		Data:         []string{"shaka_720.webm", "pip.html", cujrecorder.SystemTraceConfigFile},
 		Params: []testing.Param{
 			{
 				Name: "clamshell_mode",
@@ -189,7 +189,7 @@
 	}
 
 	if testParam.Tracing {
-		recorder.EnableTracing(s.OutDir())
+		recorder.EnableTracing(s.OutDir(), s.DataPath(cujrecorder.SystemTraceConfigFile))
 	}
 	defer recorder.Close(closeCtx)
 
diff --git a/src/chromiumos/tast/local/ui/cujrecorder/recorder.go b/src/chromiumos/tast/local/ui/cujrecorder/recorder.go
index 003ff4a..d9d5e8e 100644
--- a/src/chromiumos/tast/local/ui/cujrecorder/recorder.go
+++ b/src/chromiumos/tast/local/ui/cujrecorder/recorder.go
@@ -6,10 +6,12 @@
 package cujrecorder
 
 import (
+	"compress/gzip"
 	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
+	"os"
 	"path"
 	"path/filepath"
 	"time"
@@ -19,11 +21,11 @@
 	"chromiumos/tast/errors"
 	"chromiumos/tast/local/arc"
 	"chromiumos/tast/local/chrome"
-	"chromiumos/tast/local/chrome/browser"
 	"chromiumos/tast/local/chrome/metrics"
 	perfSrc "chromiumos/tast/local/perf"
 	"chromiumos/tast/local/power"
 	"chromiumos/tast/local/power/setup"
+	"chromiumos/tast/local/tracing"
 	"chromiumos/tast/testing"
 )
 
@@ -42,6 +44,8 @@
 
 const checkInterval = 5 * time.Second
 
+const SystemTraceConfigFile = "perfetto/system_trace_config.pbtxt"
+
 // MetricConfig is the configuration for the recorder.
 type MetricConfig struct {
 	// The name of the histogram to be recorded.
@@ -144,7 +148,8 @@
 	// Metric records keyed by metric name.
 	records map[string]*record
 
-	traceDir string
+	traceDir        string
+	perfettoCfgPath string
 
 	// duration is the total running time of the recorder.
 	duration time.Duration
@@ -446,9 +451,10 @@
 	return r, nil
 }
 
-// EnableTracing enables tracing when the recorder running test scenario.
-func (r *Recorder) EnableTracing(traceDir string) {
+// EnableSystemTracing enables system tracing when the recorder is running a test scenario.
+func (r *Recorder) EnableTracing(traceDir, perfettoCfgPath string) {
 	r.traceDir = traceDir
+	r.perfettoCfgPath = perfettoCfgPath
 }
 
 // Close clears states for all trackers.
@@ -501,28 +507,52 @@
 		r.mr = nil
 	}(ctx)
 
-	if r.traceDir != "" {
-		if err := r.cr.StartTracing(ctx,
-			[]string{"benchmark", "cc", "gpu", "input", "toplevel", "ui", "views", "viz", "memory-infra"},
-			browser.DisableSystrace()); err != nil {
-			testing.ContextLog(ctx, "Failed to start tracing: ", err)
+	if r.traceDir != "" && r.perfettoCfgPath != "" {
+		sess, err := tracing.StartSession(ctx, r.perfettoCfgPath)
+		testing.ContextLog(ctx, "Starting system tracing session")
+		if err != nil {
 			return nil, errors.Wrap(err, "failed to start tracing")
 		}
 		stopTracing := func(ctx context.Context) error {
-			tr, err := r.cr.StopTracing(ctx)
-			if err != nil {
-				testing.ContextLog(ctx, "Failed to stop tracing: ", err)
+			if err := sess.Stop(); err != nil {
 				return errors.Wrap(err, "failed to stop tracing")
 			}
-			if tr == nil || len(tr.Packet) == 0 {
-				testing.ContextLog(ctx, "No trace data is collected")
-				return errors.New("no trace data is collected")
+			testing.ContextLog(ctx, "Stopping system tracing session")
+
+			data, err := ioutil.ReadAll(sess.TraceResultFile)
+			if err != nil {
+				return errors.Wrap(err, "failed to read from the temp file of trace result")
 			}
+
 			filename := "trace.data.gz"
-			if err := chrome.SaveTraceToFile(ctx, tr, filepath.Join(r.traceDir, filename)); err != nil {
-				testing.ContextLog(ctx, "Failed to save trace to file: ", err)
-				return errors.Wrap(err, "failed to save trace to file")
+			file, err := os.OpenFile(filepath.Join(r.traceDir, filename), os.O_CREATE|os.O_RDWR, 0644)
+			if err != nil {
+				return errors.Wrap(err, "could not open file")
 			}
+			defer func() {
+				if err := file.Close(); err != nil {
+					testing.ContextLog(ctx, "Failed to close file: ", err)
+				}
+			}()
+
+			writer := gzip.NewWriter(file)
+			defer func() {
+				if err := writer.Close(); err != nil {
+					testing.ContextLog(ctx, "Failed to close gzip writer: ", err)
+				}
+			}()
+
+			if _, err := writer.Write(data); err != nil {
+				return errors.Wrap(err, "could not write the data")
+			}
+
+			if err := writer.Flush(); err != nil {
+				return errors.Wrap(err, "could not flush the gzip writer")
+			}
+
+			// The temporary file of trace data is no longer needed when returned.
+			sess.RemoveTraceResultFile()
+
 			return nil
 		}
 		cancel = func(ctx context.Context) error {