Added top ten most frequent system calls by pid

Keeps a history of the peak top ten system calls by pid.

BUG=chromium-os:21206
TEST=x86

Change-Id: I11169d3887b04a4eaa807298fdceaf0dd6d2008c
Reviewed-on: http://gerrit.chromium.org/gerrit/8733
Tested-by: Paul Taysom <taysom@google.com>
Reviewed-by: Luigi Semenzato <semenzato@chromium.org>
diff --git a/collector.c b/collector.c
index eee97a8..f624f93 100644
--- a/collector.c
+++ b/collector.c
@@ -37,11 +37,11 @@
 u64 Slept;
 int Pid[MAX_PID];
 
-PidCall_s Pid_call[MAX_PIDCALLS];
-PidCall_s *Pidclock = Pid_call;
-u64 Pid_call_iterations;
-u64 Pid_call_record;
-u64 Pid_call_tick;
+Pidcall_s Pidcall[MAX_PIDCALLS];
+Pidcall_s *Pidclock = Pidcall;
+u64 Pidcall_iterations;
+u64 Pidcall_record;
+u64 Pidcall_tick;
 
 u64 No_enter;
 u64 Found;
@@ -49,7 +49,7 @@
 u64 No_start;
 u64 Bad_type;
 
-PidCall_s *Pid_call_bucket[PIDCALL_BUCKETS];
+Pidcall_s *Pidcall_bucket[PIDCALL_BUCKETS];
 
 int Sys_exit;
 int Sys_enter;
@@ -58,7 +58,7 @@
 
 static void init_kernel_release(void)
 {
-	if (uname_kernel_release() < kernel_release("2.6.38")) {
+	if (kernel_release() < release_to_int("2.6.38")) {
 		Event_path = "events/syscalls/%s/enable";
 		Sys_exit = 21;
 		Sys_enter = 22;
@@ -164,18 +164,18 @@
 	return fd;
 }
 
-static PidCall_s *hash_pidcall(u32 pidcall)
+static Pidcall_s *hash_pidcall(u32 pidcall)
 {
-	return (PidCall_s *)&Pid_call_bucket[pidcall % PIDCALL_BUCKETS];
+	return (Pidcall_s *)&Pidcall_bucket[pidcall % PIDCALL_BUCKETS];
 }
 
-static PidCall_s *find_pidcall(u32 pidcall)
+static Pidcall_s *find_pidcall(u32 pidcall)
 {
-	PidCall_s *pc = hash_pidcall(pidcall);
+	Pidcall_s *pc = hash_pidcall(pidcall);
 
-	++Pid_call_record;
+	++Pidcall_record;
 	for (;;) {
-		++Pid_call_iterations;
+		++Pidcall_iterations;
 		pc = pc->next;
 		if (!pc) return NULL;
 		if (pc->pidcall == pidcall) {
@@ -185,9 +185,9 @@
 	}
 }
 
-static void add_pidcall(PidCall_s *pidcall)
+static void add_pidcall(Pidcall_s *pidcall)
 {
-	PidCall_s *pc = hash_pidcall(pidcall->pidcall);
+	Pidcall_s *pc = hash_pidcall(pidcall->pidcall);
 
 	pidcall->next = pc->next;
 	pc->next = pidcall;
@@ -195,8 +195,8 @@
 
 static void remove_pidcall(u32 pidcall)
 {
-	PidCall_s *prev = hash_pidcall(pidcall);
-	PidCall_s *next;
+	Pidcall_s *prev = hash_pidcall(pidcall);
+	Pidcall_s *next;
 
 	for (;;) {
 		next = prev->next;
@@ -209,15 +209,19 @@
 	}
 }
 
-static PidCall_s *victim_pidcall(u32 pidcall)
+/* reallocate_pidcall finds a Pidcall slot using a clock
+ * algorithm, throws away the data and gives it out
+ * to be reused.
+ */
+static Pidcall_s *reallocate_pidcall(u32 pidcall)
 {
-	PidCall_s *pc = Pidclock;
+	Pidcall_s *pc = Pidclock;
 
 	while (pc->clock) {
-		++Pid_call_tick;
+		++Pidcall_tick;
 		pc->clock = 0;
-		if (++Pidclock == &Pid_call[MAX_PIDCALLS]) {
-			Pidclock = Pid_call;
+		if (++Pidclock == &Pidcall[MAX_PIDCALLS]) {
+			Pidclock = Pidcall;
 		}
 		pc = Pidclock;
 	}
@@ -240,7 +244,7 @@
 	int	pid      = sy->ev.pid;
 	snint	call_num = sy->id;
 	u32	pidcall  = mkpidcall(pid, call_num);
-	PidCall_s *pc;
+	Pidcall_s *pc;
 
 	++Pid[pid];
 
@@ -252,7 +256,7 @@
 
 	pc = find_pidcall(pidcall);
 	if (!pc) {
-		pc = victim_pidcall(pidcall);
+		pc = reallocate_pidcall(pidcall);
 	}
 	++pc->count;
 	pc->time.start = time;
@@ -265,7 +269,7 @@
 	int	pid      = sy->ev.pid;
 	snint	call_num = sy->id;
 	u32	pidcall  = mkpidcall(pid, call_num);
-	PidCall_s *pc;
+	Pidcall_s *pc;
 
 	pc = find_pidcall(pidcall);
 	if (!pc) {
@@ -327,13 +331,13 @@
 		if (r->type_len == 0) {
 			/* Larger record where size is at beginning of record */
 			length = r->array[0];
-			size	= 4 + length * 4;
-			time	+= r->time_delta;
+			size = 4 + length * 4;
+			time += r->time_delta;
 		} else if (r->type_len <= 28) {
 			/* Data record */
 			length = r->type_len;
-			size	= 4 + length * 4;
-			time	+= r->time_delta;
+			size = 4 + length * 4;
+			time += r->time_delta;
 			if (Dump) {
 				dump_event(buf);
 			} else {
@@ -342,7 +346,7 @@
 		} else if (r->type_len == 29) {
 			/* Left over page padding or discarded event */
 			if (r->time_delta == 0) {
-				goto done;
+				break;
 			} else {
 				length = r->array[0];
 				size = 4 + length * 4;
@@ -360,9 +364,12 @@
 			warn(" Unknown event %d", r->type_len);
 			/* Unknown - ignore */
 			size = 4;
+			break;
+		}
+		if (size > end - buf) {
+			break;
 		}
 	}
-done:
 	pthread_mutex_unlock(&Count_lock);
 	return commit;
 }
diff --git a/display.c b/display.c
index 9ae4d42..3745a30 100644
--- a/display.c
+++ b/display.c
@@ -31,6 +31,8 @@
 	SELF_COL = 0,
 	MAX_ROW  = SELF_ROW,
 	MAX_COL  = SELF_COL + 40,
+	TOP_PID_CALL_ROW = TOP_ROW,
+	TOP_PID_CALL_COL = 35,
 };
 
 typedef struct Top_ten_s {
@@ -43,10 +45,10 @@
 	int	index;
 } Display_call_s;
 
-bool Plot = FALSE;
-
+/* Top ten highest count for pid/sys_call */
 static Top_ten_s Top_ten[10];
 
+/* Defines graph area for total system calls */
 static graph_s TotalGraph = {{0, 0}, {{0, 10}, {60, 20}}};
 
 Display_call_s Display_call[] = {
@@ -69,8 +71,9 @@
 static void help(void)
 {
 	mvprintw(HELP_ROW, HELP_COL,
-		"q quit  c clear  k kernel ops  p plot op"
+		"q quit  c clear  k kernel ops  g graph op"
 		"  i internal ops  f file ops"
+		"  p pause"
 		"  < shorter  > longer %d.%.3d",
 		Sleep.tv_sec, Sleep.tv_nsec / ONE_MILLION);
 }
@@ -102,10 +105,10 @@
 	mvprintw(SELF_ROW+6, SELF_COL, "No_start:    %12lld", No_start);
 	mvprintw(SELF_ROW+7, SELF_COL, "Bad type:    %12lld", Bad_type);
 	if (1) {
-		mvprintw(SELF_ROW+10, SELF_COL, "Ticks:       %12lld", Pid_call_tick);
-		if (Pid_call_record == 0) return;
-		avg = (double)Pid_call_iterations / (double)Pid_call_record;
-		Pid_call_iterations = Pid_call_record = 0;
+		mvprintw(SELF_ROW+10, SELF_COL, "Ticks:       %12lld", Pidcall_tick);
+		if (Pidcall_record == 0) return;
+		avg = (double)Pidcall_iterations / (double)Pidcall_record;
+		Pidcall_iterations = Pidcall_record = 0;
 		if (avg > max) max =avg;
 		mvprintw(SELF_ROW+11, SELF_COL, "Avg:              %g", avg);
 		mvprintw(SELF_ROW+12, SELF_COL, "Max:              %g", max);
@@ -151,7 +154,7 @@
 
 static void display_pidcall(void)
 {
-	PidCall_s *pc;
+	Pidcall_s *pc;
 	int row = PID_ROW;
 	int col = PID_COL;
 	int pid;
@@ -177,12 +180,30 @@
 	}
 }
 
+static void display_top_pidcall(void)
+{
+	TopPidcall_s *tc;
+	int row = TOP_PID_CALL_ROW;
+	int col = TOP_PID_CALL_COL;
+
+	mvprintw(row++, col, "   count   duration   when    pid");
+	for (tc = Top_pidcall; tc < &Top_pidcall[MAX_TOP]; tc++, row++) {
+		if (tc->count == 0) return;
+		mvprintw(row, col,
+		         "%8d %10lld %6d %6d %-22.22s %-30.30s",
+		         tc->count, tc->time / tc->count,
+		         tc->tick, get_pid(tc->pidcall),
+		         Syscall[get_call(tc->pidcall)],
+		         tc->name);
+	}
+}
+
 static void display_top_ten(void)
 {
 	int row = TOP_ROW;
 	int i;
 
-	mvprintw(row++, TOP_COL, "    hits sys_call");
+	mvprintw(row++, TOP_COL, "   count sys_call");
 	for (i = 0; i < 10; i++, row++) {
 		if (Top_ten[i].value == 0) return;
 		mvprintw(row, TOP_COL, "%8d %-22.22s",
@@ -254,6 +275,7 @@
 	top_ten();
 	display_pidcall();
 	display_top_ten();
+	display_top_pidcall();
 	help();
 	refresh();
 }
diff --git a/ktop.h b/ktop.h
index 1a4c6c4..e2bdb5f 100644
--- a/ktop.h
+++ b/ktop.h
@@ -19,7 +19,9 @@
 	MAX_NAME = 1 << 12,
 	SYSCALL_SHIFT = 9,
 	SYSCALL_MASK  = (1 << SYSCALL_SHIFT) - 1,
-	NUM_ARGS = 6 };
+	NUM_ARGS = 6,
+	MAX_TOP = 10,
+	MAX_THREAD_NAME = 40 };
 
 CHECK_CONST((1 << SYSCALL_SHIFT) >= NUM_SYS_CALLS);
 
@@ -40,9 +42,9 @@
 
 typedef void (*display_fn)(void);
 
-typedef struct PidCall_s PidCall_s;
-struct PidCall_s {
-	PidCall_s *next;
+typedef struct Pidcall_s Pidcall_s;
+struct Pidcall_s {
+	Pidcall_s *next;
 	u32 pidcall;
 	u32 count;
 	unint clock;
@@ -58,22 +60,29 @@
 	char *name;
 };
 
+typedef struct TopPidcall_s {
+	u32 pidcall;
+	u32 count;
+	u32 tick;
+	u64 time;
+	char name[MAX_THREAD_NAME];
+} TopPidcall_s;
+
 extern bool Dump;	/* Dump of ftrace logs - don't start display */
 extern bool Trace_exit;	/* Trace sys_exit events */
 extern bool Trace_self;	/* Trace myself and ignore others */
+extern bool Pause;	/* Pause display */
 
 extern display_fn Display;
-void internal_display(void);
-void kernel_display(void);
-void plot_display(void);
-void file_system_display(void);
 
 extern u64 Syscall_count[NUM_SYS_CALLS];
 extern int Pid[MAX_PID];
-extern PidCall_s Pid_call[MAX_PIDCALLS];
-extern u64 Pid_call_record;
-extern u64 Pid_call_iterations;
-extern u64 Pid_call_tick;
+extern Pidcall_s Pidcall[MAX_PIDCALLS];
+extern u64 Pidcall_record;
+extern u64 Pidcall_iterations;
+extern u64 Pidcall_tick;
+
+extern TopPidcall_s Top_pidcall[MAX_TOP];
 
 extern u64 No_enter;
 extern u64 Found;
@@ -85,6 +94,11 @@
 extern u64 Slept;
 extern bool Halt;
 
+void internal_display(void);
+void kernel_display(void);
+void plot_display(void);
+void file_system_display(void);
+
 void cleanup(int sig);
 
 void cleanup_collector(void);
diff --git a/main.c b/main.c
index dd3b457..ab9d261 100644
--- a/main.c
+++ b/main.c
@@ -22,6 +22,7 @@
 bool Dump = FALSE;
 bool Trace_exit = TRUE;
 bool Trace_self = FALSE;
+bool Pause = FALSE;
 
 display_fn Display = kernel_display;
 
@@ -92,9 +93,10 @@
 		"\tq - quit\n"
 		"\tc - reset internal counters\n"
 		"\tk - display top kernel operations (default)\n"
-		"\tp - display plot of selected operation\n"
+		"\tg - display graph of selected operation\n"
 		"\tf - display just file system operations\n"
 		"\ti - display counters internal to ktop for debugging\n"
+		"\tp - toggle pause\n"
 		"\t< - reduce redisplay interval\n"
 		"\t> - increase redisplay interval\n",
 		getprogname());
@@ -108,7 +110,7 @@
 	setprogname(argv[0]);
 	set_signals();
 
-	while ((c = getopt(argc, argv, "dhs?")) != -1) {
+	while ((c = getopt(argc, argv, "dhs")) != -1) {
 		switch (c) {
 		case 'd':
 			Dump = TRUE;
@@ -117,11 +119,10 @@
 			Trace_self = TRUE;
 			break;
 		case 'h':
-		case '?':
 			usage();
 			break;
 		default:
-			fprintf(stderr, "something out of date '%c'\n", c);
+			fprintf(stderr, "unknown flag '%c'\n", c);
 			usage();
 			break;
 		}
@@ -162,12 +163,15 @@
 		case 'k':
 			Display = kernel_display;
 			break;
-		case 'p':
+		case 'g':
 			Display = plot_display;
 			break;
 		case 'f':
 			Display = file_system_display;
 			break;
+		case 'p':
+			Pause = !Pause;
+			break;
 		default:
 			break;  // ignore
 		}
diff --git a/reduce.c b/reduce.c
index 46160b8..c9dae52 100644
--- a/reduce.c
+++ b/reduce.c
@@ -20,39 +20,114 @@
 
 extern pthread_mutex_t Count_lock;
 
-
 struct timespec Sleep = { 1, 0 };
 
+/*
+ * A and B are two arrays of counters for sys_call events
+ * that are swapped between old and new.
+ */
 static u64 A[NUM_SYS_CALLS];
 static u64 B[NUM_SYS_CALLS];
-
 u64 *Old = A;
 u64 *New = B;
+
+/* Difference between New and Old */
 int Delta[NUM_SYS_CALLS];
+
+/* Decending sorted array of counts for pid/system_calls */
 void *Rank_pidcall[MAX_PIDCALLS];
+
+/* Current number pid/sys_calls in Rank_pidcall array */
 int Num_rank;
 
+/* Top count for pid/sys_calls since last clear */
+TopPidcall_s Top_pidcall[MAX_TOP];
+
+/* Change in total count of sys_calls events */
 TickCounter_s Total_delta;
 
+/* Number of times reduce/display has been called. */
+static int Num_ticks = 0;
+
 static int compare_pidcall(const void *a, const void *b)
 {
-	const PidCall_s *p = *(const PidCall_s **)a;
-	const PidCall_s *q = *(const PidCall_s **)b;
+	const Pidcall_s *p = *(const Pidcall_s **)a;
+	const Pidcall_s *q = *(const Pidcall_s **)b;
 
 	if (p->save.count > q->save.count) return -1;
 	if (p->save.count == q->save.count) return 0;
 	return 1;
 }
 
+static void fill_top_pidcall(TopPidcall_s *tc, Pidcall_s *pc)
+{
+	tc->pidcall = pc->pidcall;
+	tc->count   = pc->save.count;
+	tc->tick    = Num_ticks;
+	tc->time    = pc->save.time;
+	if (pc->name) {
+		strncpy(tc->name, pc->name, MAX_THREAD_NAME);
+		tc->name[MAX_THREAD_NAME - 1] = '\0';
+	} else {
+		tc->name[0] = '\0';
+	}
+}
+
+static void replace_top_pidcall(TopPidcall_s *insert_here, Pidcall_s *pc)
+{
+	TopPidcall_s *tc;
+
+	/* see if this pidcall is already in list */
+	for (tc = Top_pidcall; tc < &Top_pidcall[MAX_TOP]; tc++) {
+		if (pc->pidcall == tc->pidcall) {
+			if (pc->save.count <= tc->count) return;
+			assert(tc >= insert_here);
+			memmove(&insert_here[1], insert_here,
+				(tc - insert_here) * sizeof(*tc));
+			fill_top_pidcall(insert_here, pc);
+			return;
+		}
+	}
+	memmove(&insert_here[1], insert_here,
+		(tc - insert_here - 1) * sizeof(*tc));
+	fill_top_pidcall(insert_here, pc);
+}
+
+static void top_pidcall(void)
+{
+	Pidcall_s *pc;
+	int i;
+	int j;
+	int num_top;
+
+	++Num_ticks;
+	if (Num_rank < MAX_TOP) {
+		num_top = Num_rank;
+	} else {
+		num_top = MAX_TOP;
+	}
+	for (i = 0; i < num_top; i++) {
+		pc = Rank_pidcall[i];
+		if (pc->save.count < Top_pidcall[MAX_TOP - 1].count) break;
+		for (j = 0; j < MAX_TOP; j++) {
+			TopPidcall_s *tc = &Top_pidcall[j];
+			if (pc->save.count >= tc->count) {
+				replace_top_pidcall(tc, pc);
+				break;
+			}
+		}
+	}
+}
+
 static void reduce_pidcall(void)
 {
-	PidCall_s *pc;
-	int j;
+	Pidcall_s *pc;
+	int k;
 
 	pthread_mutex_lock(&Count_lock);
-	for (pc = Pid_call, j = 0; pc < &Pid_call[MAX_PIDCALLS]; pc++) {
+	for (pc = Pidcall, k = 0; pc < &Pidcall[MAX_PIDCALLS]; pc++) {
 		if (pc->count) {
-			Rank_pidcall[j++] = pc;
+			Rank_pidcall[k++] = pc;
 			pc->save.count = pc->count;
 			pc->count = 0;
 			pc->save.time = pc->time.total;
@@ -60,10 +135,11 @@
 		}
 	}
 	pthread_mutex_unlock(&Count_lock);
-	Num_rank = j;
-	if (1) {
-		qsort(Rank_pidcall, j, sizeof(void *), compare_pidcall);
-	}
+	Num_rank = k;
+
+	qsort(Rank_pidcall, k, sizeof(void *), compare_pidcall);
+
+	top_pidcall();
 }
 
 static void delta(void)
@@ -129,6 +205,7 @@
 	zero(Syscall_count);
 	Ignored_pid_count = 0;
 	Slept = 0;
+	zero(Top_pidcall);
 }
 
 void *reduce(void *arg)
@@ -140,7 +217,7 @@
 		if (Halt) return NULL;
 		delta();
 		reduce_pidcall();
-		Display();
+		if (!Pause) Display();
 		nanosleep(&Sleep, NULL);
 	}
 }
diff --git a/util.c b/util.c
index 6cf36dc..8c09827 100644
--- a/util.c
+++ b/util.c
@@ -10,7 +10,7 @@
 
 #include "util.h"
 
-int kernel_release (char *r)
+int release_to_int (char *r)
 {
 	int a;
 	int b;
@@ -26,10 +26,10 @@
 	return a;
 }
 
-int uname_kernel_release (void)
+int kernel_release (void)
 {
 	struct utsname buf;
 	int rc = uname(&buf);
 	if (rc) fatal("rc=%d:", rc);
-	return kernel_release(buf.release);
+	return release_to_int(buf.release);
 }
diff --git a/util.h b/util.h
index cabb6b0..e34f74d 100644
--- a/util.h
+++ b/util.h
@@ -6,7 +6,7 @@
 #ifndef _UTIL_H_
 #define _UTIL_H_ 1
 
-int kernel_release(char *r);
-int uname_kernel_release(void);
+int release_to_int(char *r);
+int kernel_release(void);
 
 #endif /* _UTIL_H_ */