[Codesearch] Fix package_index OOM issue by using unique.

The fix can reduce peak memory usage from 29.6G to 13~14G.

Bug: 460271270
Change-Id: Ic99b03df5fb17ad1e9d854ae87f062c629184395
Reviewed-on: https://chromium-review.googlesource.com/c/infra/infra/+/7457920
Reviewed-by: Junji Watanabe <jwata@google.com>
Commit-Queue: Marc Jin <jmarc@google.com>
Reviewed-by: Fumitoshi Ukai <ukai@google.com>
Cr-Commit-Position: refs/heads/main@{#78390}
diff --git a/go/src/infra/cmd/package_index/compdb.go b/go/src/infra/cmd/package_index/compdb.go
index 4c4766a..5a1820d 100644
--- a/go/src/infra/cmd/package_index/compdb.go
+++ b/go/src/infra/cmd/package_index/compdb.go
@@ -13,6 +13,7 @@
 	"path/filepath"
 	"strings"
 	"sync"
+	"unique"
 
 	"github.com/golang/protobuf/ptypes"
 
@@ -171,7 +172,7 @@
 	}
 
 	var files []string
-	value, ok := targetDepsMap[target.File]
+	value, ok := targetDepsMap[unique.Make(target.File)]
 	if !ok {
 		return files, fmt.Errorf("fail to find dependencies in siso deps query for %s", target.File)
 	}
@@ -181,7 +182,7 @@
 	// filepaths files.
 	clangTargetChan <- &clangUnitInfo{target, ""}
 	for _, dep := range value {
-		files = appendToFiles(ctx, files, target, dep, target.File)
+		files = appendToFiles(ctx, files, target, dep.Value(), target.File)
 	}
 	return files, nil
 }
@@ -306,12 +307,12 @@
 	commandList = append(commandList, "-DKYTHE_IS_RUNNING=1")
 
 	if useSiso {
-		deps, ok := targetDepsMap[clangInfo.unit.File]
+		deps, ok := targetDepsMap[unique.Make(clangInfo.unit.File)]
 		if !ok {
 			return nil, fmt.Errorf("getClangUnit: not able to find dependencies for %s", clangInfo.unit.File)
 		}
 		for _, dep := range deps {
-			if err := processAndAddClangUnitInput(ctx, dep, clangInfo, outDir, corpus, hashMaps, unitProto); err != nil {
+			if err := processAndAddClangUnitInput(ctx, dep.Value(), clangInfo, outDir, corpus, hashMaps, unitProto); err != nil {
 				return nil, err
 			}
 		}
diff --git a/go/src/infra/cmd/package_index/main.go b/go/src/infra/cmd/package_index/main.go
index 0dc950d..7a04a13 100644
--- a/go/src/infra/cmd/package_index/main.go
+++ b/go/src/infra/cmd/package_index/main.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"cmp"
 	"context"
 	"encoding/json"
 	"errors"
@@ -18,6 +19,7 @@
 	"sync"
 	"text/tabwriter"
 	"time"
+	"unique"
 
 	"go.chromium.org/luci/common/data/stringset"
 	"go.chromium.org/luci/common/logging"
@@ -64,7 +66,7 @@
 
 // Only built when use_siso = true.
 // Key: source file, value: list of dependencies.
-type targetDepsMap = map[string][]string
+type targetDepsMap = map[unique.Handle[string]][]unique.Handle[string]
 
 // validateFlags checks that the required flags are present.
 func validateFlags(ctx context.Context) {
@@ -119,9 +121,13 @@
 func loadSisoJsonDeps(depsReader io.Reader) (targetDepsMap, error) {
 	m := make(targetDepsMap)
 
+	var target depInfo
 	decoder := json.NewDecoder(depsReader)
 	for {
-		var target depInfo
+		// Clean up the target variable to reuse.
+		target.Target = ""
+		target.Deps = target.Deps[:0]
+
 		err := decoder.Decode(&target)
 		if errors.Is(err, io.EOF) {
 			break
@@ -133,38 +139,43 @@
 		if len(target.Deps) == 0 || !strings.HasPrefix(target.Target, "obj/") {
 			continue
 		}
-		var sourceFile string
+		var sourceFileHandle unique.Handle[string]
+		targetHandles := make([]unique.Handle[string], 0, len(target.Deps))
 		for _, dep := range target.Deps {
+			h := unique.Make(dep)
 			// Find the first source file.
 			// It is usually the first/second element of the dependency list.
-			if isSourceFile(dep) {
-				sourceFile = dep
-				break
+			if sourceFileHandle == (unique.Handle[string]{}) && isSourceFile(dep) {
+				sourceFileHandle = h
 			}
+			targetHandles = append(targetHandles, h)
 		}
-		if sourceFile == "" {
+
+		if sourceFileHandle == (unique.Handle[string]{}) {
 			// If no source file, just ignore the current group.
 			continue
 		}
-		value, ok := m[sourceFile]
+		value, ok := m[sourceFileHandle]
 		if !ok {
-			m[sourceFile] = target.Deps
+			m[sourceFileHandle] = targetHandles
 			continue
 		}
 		// Merge the dependencies if source file is previously encountered.
-		set := make(map[string]struct{})
+		set := make(map[unique.Handle[string]]struct{})
 		for _, val := range value {
 			set[val] = struct{}{}
 		}
-		for _, val := range target.Deps {
+		for _, val := range targetHandles {
 			set[val] = struct{}{}
 		}
-		result := make([]string, 0, len(set))
+		result := make([]unique.Handle[string], 0, len(set))
 		for key := range set {
 			result = append(result, key)
 		}
-		slices.Sort(result)
-		m[sourceFile] = result
+		slices.SortFunc(result, func(a, b unique.Handle[string]) int {
+			return cmp.Compare(a.Value(), b.Value())
+		})
+		m[sourceFileHandle] = result
 	}
 
 	return m, nil
diff --git a/go/src/infra/cmd/package_index/package_index_unix_test.go b/go/src/infra/cmd/package_index/package_index_unix_test.go
index e79764b..44608a3 100644
--- a/go/src/infra/cmd/package_index/package_index_unix_test.go
+++ b/go/src/infra/cmd/package_index/package_index_unix_test.go
@@ -17,6 +17,7 @@
 	"sync"
 	"testing"
 	"time"
+	"unique"
 
 	"google.golang.org/protobuf/encoding/prototext"
 	"google.golang.org/protobuf/proto"
@@ -132,7 +133,7 @@
 				go func() {
 					// Process clang files.
 					err := clangTargets.ProcessClangTargets(ip.ctx, ip.rootPath, ip.outDir, ip.corpus,
-						ip.buildConfig, "", ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[string][]string))
+						ip.buildConfig, "", ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[unique.Handle[string]][]unique.Handle[string]))
 					if err != nil {
 						t.Error(err)
 					}
diff --git a/go/src/infra/cmd/package_index/package_index_windows_test.go b/go/src/infra/cmd/package_index/package_index_windows_test.go
index 60038bf..fd9171b 100644
--- a/go/src/infra/cmd/package_index/package_index_windows_test.go
+++ b/go/src/infra/cmd/package_index/package_index_windows_test.go
@@ -18,6 +18,7 @@
 	"sync"
 	"testing"
 	"time"
+	"unique"
 
 	"google.golang.org/protobuf/encoding/prototext"
 	"google.golang.org/protobuf/proto"
@@ -153,7 +154,7 @@
 				go func() {
 					// Process clang files.
 					err := clangTargets.ProcessClangTargets(ip.ctx, ip.rootPath, ip.outDir, ip.corpus,
-						ip.buildConfig, ip.clangTargetArch, ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[string][]string))
+						ip.buildConfig, ip.clangTargetArch, ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[unique.Handle[string]][]unique.Handle[string]))
 					if err != nil {
 						// See b:227367175 for context.
 						panic(err.Error())