[Codesearch] Fix package_index OOM issue by using unique. The fix can reduce peak memory usage from 29.6G to 13~14G. Bug: 460271270 Change-Id: Ic99b03df5fb17ad1e9d854ae87f062c629184395 Reviewed-on: https://chromium-review.googlesource.com/c/infra/infra/+/7457920 Reviewed-by: Junji Watanabe <jwata@google.com> Commit-Queue: Marc Jin <jmarc@google.com> Reviewed-by: Fumitoshi Ukai <ukai@google.com> Cr-Commit-Position: refs/heads/main@{#78390}
diff --git a/go/src/infra/cmd/package_index/compdb.go b/go/src/infra/cmd/package_index/compdb.go index 4c4766a..5a1820d 100644 --- a/go/src/infra/cmd/package_index/compdb.go +++ b/go/src/infra/cmd/package_index/compdb.go
@@ -13,6 +13,7 @@ "path/filepath" "strings" "sync" + "unique" "github.com/golang/protobuf/ptypes" @@ -171,7 +172,7 @@ } var files []string - value, ok := targetDepsMap[target.File] + value, ok := targetDepsMap[unique.Make(target.File)] if !ok { return files, fmt.Errorf("fail to find dependencies in siso deps query for %s", target.File) } @@ -181,7 +182,7 @@ // filepaths files. clangTargetChan <- &clangUnitInfo{target, ""} for _, dep := range value { - files = appendToFiles(ctx, files, target, dep, target.File) + files = appendToFiles(ctx, files, target, dep.Value(), target.File) } return files, nil } @@ -306,12 +307,12 @@ commandList = append(commandList, "-DKYTHE_IS_RUNNING=1") if useSiso { - deps, ok := targetDepsMap[clangInfo.unit.File] + deps, ok := targetDepsMap[unique.Make(clangInfo.unit.File)] if !ok { return nil, fmt.Errorf("getClangUnit: not able to find dependencies for %s", clangInfo.unit.File) } for _, dep := range deps { - if err := processAndAddClangUnitInput(ctx, dep, clangInfo, outDir, corpus, hashMaps, unitProto); err != nil { + if err := processAndAddClangUnitInput(ctx, dep.Value(), clangInfo, outDir, corpus, hashMaps, unitProto); err != nil { return nil, err } }
diff --git a/go/src/infra/cmd/package_index/main.go b/go/src/infra/cmd/package_index/main.go index 0dc950d..7a04a13 100644 --- a/go/src/infra/cmd/package_index/main.go +++ b/go/src/infra/cmd/package_index/main.go
@@ -5,6 +5,7 @@ package main import ( + "cmp" "context" "encoding/json" "errors" @@ -18,6 +19,7 @@ "sync" "text/tabwriter" "time" + "unique" "go.chromium.org/luci/common/data/stringset" "go.chromium.org/luci/common/logging" @@ -64,7 +66,7 @@ // Only built when use_siso = true. // Key: source file, value: list of dependencies. -type targetDepsMap = map[string][]string +type targetDepsMap = map[unique.Handle[string]][]unique.Handle[string] // validateFlags checks that the required flags are present. func validateFlags(ctx context.Context) { @@ -119,9 +121,13 @@ func loadSisoJsonDeps(depsReader io.Reader) (targetDepsMap, error) { m := make(targetDepsMap) + var target depInfo decoder := json.NewDecoder(depsReader) for { - var target depInfo + // Clean up the target variable to reuse. + target.Target = "" + target.Deps = target.Deps[:0] + err := decoder.Decode(&target) if errors.Is(err, io.EOF) { break @@ -133,38 +139,43 @@ if len(target.Deps) == 0 || !strings.HasPrefix(target.Target, "obj/") { continue } - var sourceFile string + var sourceFileHandle unique.Handle[string] + targetHandles := make([]unique.Handle[string], 0, len(target.Deps)) for _, dep := range target.Deps { + h := unique.Make(dep) // Find the first source file. // It is usually the first/second element of the dependency list. - if isSourceFile(dep) { - sourceFile = dep - break + if sourceFileHandle == (unique.Handle[string]{}) && isSourceFile(dep) { + sourceFileHandle = h } + targetHandles = append(targetHandles, h) } - if sourceFile == "" { + + if sourceFileHandle == (unique.Handle[string]{}) { // If no source file, just ignore the current group. continue } - value, ok := m[sourceFile] + value, ok := m[sourceFileHandle] if !ok { - m[sourceFile] = target.Deps + m[sourceFileHandle] = targetHandles continue } // Merge the dependencies if source file is previously encountered. - set := make(map[string]struct{}) + set := make(map[unique.Handle[string]]struct{}) for _, val := range value { set[val] = struct{}{} } - for _, val := range target.Deps { + for _, val := range targetHandles { set[val] = struct{}{} } - result := make([]string, 0, len(set)) + result := make([]unique.Handle[string], 0, len(set)) for key := range set { result = append(result, key) } - slices.Sort(result) - m[sourceFile] = result + slices.SortFunc(result, func(a, b unique.Handle[string]) int { + return cmp.Compare(a.Value(), b.Value()) + }) + m[sourceFileHandle] = result } return m, nil
diff --git a/go/src/infra/cmd/package_index/package_index_unix_test.go b/go/src/infra/cmd/package_index/package_index_unix_test.go index e79764b..44608a3 100644 --- a/go/src/infra/cmd/package_index/package_index_unix_test.go +++ b/go/src/infra/cmd/package_index/package_index_unix_test.go
@@ -17,6 +17,7 @@ "sync" "testing" "time" + "unique" "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/proto" @@ -132,7 +133,7 @@ go func() { // Process clang files. err := clangTargets.ProcessClangTargets(ip.ctx, ip.rootPath, ip.outDir, ip.corpus, - ip.buildConfig, "", ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[string][]string)) + ip.buildConfig, "", ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[unique.Handle[string]][]unique.Handle[string])) if err != nil { t.Error(err) }
diff --git a/go/src/infra/cmd/package_index/package_index_windows_test.go b/go/src/infra/cmd/package_index/package_index_windows_test.go index 60038bf..fd9171b 100644 --- a/go/src/infra/cmd/package_index/package_index_windows_test.go +++ b/go/src/infra/cmd/package_index/package_index_windows_test.go
@@ -18,6 +18,7 @@ "sync" "testing" "time" + "unique" "google.golang.org/protobuf/encoding/prototext" "google.golang.org/protobuf/proto" @@ -153,7 +154,7 @@ go func() { // Process clang files. err := clangTargets.ProcessClangTargets(ip.ctx, ip.rootPath, ip.outDir, ip.corpus, - ip.buildConfig, ip.clangTargetArch, ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[string][]string)) + ip.buildConfig, ip.clangTargetArch, ip.hashMaps, dataFileChannel, unitProtoChannel, false, make(map[unique.Handle[string]][]unique.Handle[string])) if err != nil { // See b:227367175 for context. panic(err.Error())