Reland "lib: Move gmerge binhost from src/platform/dev"

This is a reland of commit 8d0f67b603baecf2da3e1ad830062e356430f5ab

Original change's description:
> lib: Move gmerge binhost from src/platform/dev
>
> Move the gmerge binhost logic that was previously in
> src/platform/dev/builder.py into Chromite so that
> scripts/strip_package.py isn't importing a module from platform.
>
> BUG=b:338512093
> TEST=./run_tests
>
> Change-Id: I3d96de6a54609c429bb07abaffd88acec56b5e26
> Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/5531182
> Reviewed-by: Jack Rosenthal <jrosenth@chromium.org>
> Tested-by: Tim Bain <tbain@google.com>
> Commit-Queue: Tim Bain <tbain@google.com>

BUG=b:338512093
TEST=./run_tests

Change-Id: Ic3388889f1037428761f76ab9ca4242d8c2d50ff
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/5545259
Commit-Queue: Tim Bain <tbain@google.com>
Tested-by: Tim Bain <tbain@google.com>
Reviewed-by: Jack Rosenthal <jrosenth@chromium.org>
diff --git a/lib/cros_build_lib.py b/lib/cros_build_lib.py
index d1e071d..29e5656 100644
--- a/lib/cros_build_lib.py
+++ b/lib/cros_build_lib.py
@@ -942,7 +942,7 @@
     raise DieSystemExit(1)
 
 
-def GetSysrootToolPath(sysroot, tool_name):
+def GetSysrootToolPath(sysroot: str, tool_name: str) -> str:
     """Returns the path to the sysroot specific version of a tool.
 
     Does not check that the tool actually exists.
@@ -955,7 +955,13 @@
         string path to tool inside the sysroot.
     """
     if sysroot == "/":
-        return os.path.join(sysroot, "usr", "bin", tool_name)
+        # Search both /usr/bin and /usr/sbin within the sysroot, since some
+        # tools are in each for the SDK.
+        return osutils.Which(
+            tool_name,
+            path=os.pathsep.join(["/usr/bin", "/usr/sbin"]),
+            root=sysroot,
+        )
 
     return os.path.join(sysroot, "build", "bin", tool_name)
 
diff --git a/lib/gmerge_binhost.py b/lib/gmerge_binhost.py
new file mode 100644
index 0000000..b95ff25
--- /dev/null
+++ b/lib/gmerge_binhost.py
@@ -0,0 +1,193 @@
+# Copyright 2024 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Installs packages from a sysroot into a gmerge-specific binhost"""
+
+import logging
+import os
+from pathlib import Path
+import subprocess
+from typing import List
+
+import portage  # pylint: disable=import-error
+
+from chromite.lib import cros_build_lib
+from chromite.lib import osutils
+
+
+# Relative path to the wrapper directory inside the sysroot.
+_SYSROOT_BUILD_BIN = "build/bin"
+
+
+def _filter_install_mask_from_package(in_path: str, out_path: str) -> None:
+    """Filter files matching DEFAULT_INSTALL_MASK out of a tarball.
+
+    Args:
+        in_path: Unfiltered tarball.
+        out_path: Location to write filtered tarball.
+    """
+
+    # Grab metadata about package in xpak format.
+    my_xpak = portage.xpak.xpak_mem(portage.xpak.tbz2(in_path).get_data())
+
+    # Build list of files to exclude. The tar command uses a slightly
+    # different exclude format than gmerge, so it needs to be adjusted
+    # appropriately.
+    masks = os.environ.get("DEFAULT_INSTALL_MASK", "").split()
+    # Look for complete paths matching the specified pattern.  Leading slashes
+    # are removed so that the paths are relative. Trailing slashes are removed
+    # so that we delete the directory itself when the '/usr/include/' path is
+    # given.
+    masks = [mask.strip("/") for mask in masks]
+    masks = ['--exclude="./%s"' % mask for mask in masks]
+    excludes = ["--anchored"] + masks
+
+    gmerge_dir = os.path.dirname(out_path)
+    os.makedirs(gmerge_dir, mode=0o755, exist_ok=True)
+
+    with osutils.TempDir(sudo_rm=True) as tmpd:
+        tmpd_sysroot = Path(tmpd) / "sysroot"
+        osutils.SafeMakedirs(tmpd_sysroot)
+        # Extract package to temporary directory (excluding masked files).
+        # Because the binpkg isn't a well-formed zstd file due to the xpak
+        # content at the end, we have to use zstd or zstdmt (pzstd doesn't work)
+        # and we have to pipe via stdout rather than extracting to the
+        # filesystem (that throws an error that's skipped with -c).
+        res = cros_build_lib.sudo_run(
+            ["zstdmt", "-dcf", in_path], stdout=subprocess.PIPE
+        )
+        cros_build_lib.run(
+            ["tar", "-x", "-C", tmpd_sysroot, "--wildcards"] + excludes,
+            input=res.stdout,
+        )
+
+        tmp_out_path = Path(tmpd) / Path(out_path).name
+        # Build filtered version of package.
+        cros_build_lib.CreateTarball(
+            tmp_out_path,
+            tmpd_sysroot,
+            compression=cros_build_lib.CompressionType.ZSTD,
+        )
+
+        # Copy package metadata over to new package file.
+        portage.xpak.tbz2(tmp_out_path).recompose_mem(my_xpak)
+
+        cros_build_lib.sudo_run(["mv", tmp_out_path, out_path])
+        osutils.Chown(out_path, user="root", group="root")
+
+
+def update_gmerge_binhost(sysroot: str, pkgs: List[str], deep: bool) -> bool:
+    """Add packages to our gmerge-specific binhost.
+
+    Files matching DEFAULT_INSTALL_MASK are not included in the tarball.
+
+    Args:
+        sysroot: Path to the sysroot.
+        pkgs: List of packages to update.
+        deep: If True, update all packages in the binhost, else only the ones
+            specified in pkgs.
+
+    Returns:
+        True if any packages were updated in the gmerge binhost.
+    """
+    # Portage internal api expects the sysroot to ends with a '/'.
+    sysroot = os.path.join(sysroot, "")
+    # To handle the edge case where we invoke this against the SDK sysroot on a
+    # builder where / is not writable, we'll put our output dirs under /tmp.
+    # Since we believe this is only done in unit tests, we accept the
+    # inconsistency with where the output is written for board sysroots.
+    output_dir = os.path.join(sysroot, "tmp") if sysroot == "/" else sysroot
+
+    gmerge_pkgdir = os.path.join(output_dir, "gmerge-packages")
+    stripped_link = os.path.join(output_dir, "stripped-packages")
+
+    # Create gmerge pkgdir and give us permission to write to it.
+    osutils.SafeMakedirs(gmerge_pkgdir, sudo=True)
+    osutils.SafeSymlink(
+        os.path.basename(gmerge_pkgdir), stripped_link, sudo=True
+    )
+    osutils.Chown(gmerge_pkgdir, user=True)
+
+    # Load databases.
+    trees = portage.create_trees(config_root=sysroot, target_root=sysroot)
+    vardb = trees[sysroot]["vartree"].dbapi
+    bintree = trees[sysroot]["bintree"]
+    bintree.populate()
+    gmerge_tree = portage.dbapi.bintree.binarytree(
+        pkgdir=gmerge_pkgdir, settings=bintree.settings
+    )
+    gmerge_tree.populate()
+
+    # The portage API here is subtle.  Results from these lookups are a pkg_str
+    # object which derive from Python strings but attach some extra metadata
+    # (like package file sizes and build times).  Helpers like __cmp__ aren't
+    # changed, so the set logic can works.  But if you use a pkg_str from one
+    # bintree in another, it can fail to resolve, while stripping off the extra
+    # metadata allows the bintree to do the resolution internally.  Hence we
+    # normalize all results here to strings.
+    if deep:
+        # If we're in deep mode, fill in the binhost completely.
+        gmerge_matches = {str(x) for x in gmerge_tree.dbapi.cpv_all()}
+        bindb_matches = {str(x) for x in bintree.dbapi.cpv_all()}
+        installed_matches = {str(x) for x in vardb.cpv_all()} & bindb_matches
+    else:
+        # Otherwise, just fill in the requested package.
+        gmerge_matches = set()
+        bindb_matches = set()
+        installed_matches = set()
+        for pkg in pkgs:
+            gmerge_matches.update(
+                {str(x) for x in gmerge_tree.dbapi.match(pkg)}
+            )
+            bindb_matches.update({str(x) for x in bintree.dbapi.match(pkg)})
+            installed_matches.update(
+                {str(x) for x in vardb.match(pkg)} & bindb_matches
+            )
+
+    # Remove any stale packages that exist in the local binhost but are not
+    # installed anymore.
+    if bindb_matches - installed_matches:
+        subprocess.check_call(
+            [
+                cros_build_lib.GetSysrootToolPath(sysroot, "eclean"),
+                "-d",
+                "packages",
+            ]
+        )
+
+    # Remove any stale packages that exist in the gmerge binhost but are not
+    # installed anymore.
+    changed = False
+    for pkg in gmerge_matches - installed_matches:
+        gmerge_path = gmerge_tree.getname(pkg)
+        if osutils.SafeUnlink(gmerge_path, sudo=True):
+            changed = True
+
+    # Copy any installed packages that have been rebuilt to the gmerge binhost.
+    for pkg in installed_matches:
+        (build_time,) = bintree.dbapi.aux_get(pkg, ["BUILD_TIME"])
+        build_path = bintree.getname(pkg)
+        gmerge_path = gmerge_tree.getname(pkg)
+
+        # If a package exists in the gmerge binhost with the same build time,
+        # don't rebuild it.
+        if pkg in gmerge_matches and os.path.exists(gmerge_path):
+            (old_build_time,) = gmerge_tree.dbapi.aux_get(pkg, ["BUILD_TIME"])
+            if old_build_time == build_time:
+                continue
+
+        logging.info("Filtering install mask from %s", pkg)
+        _filter_install_mask_from_package(build_path, gmerge_path)
+        changed = True
+
+    # If the gmerge binhost was changed, update the Packages file to match.
+    if changed:
+        cmd = [
+            cros_build_lib.GetSysrootToolPath(sysroot, "emaint"),
+            "-f",
+            "binhost",
+        ]
+        cros_build_lib.run(cmd, extra_env={"PKGDIR": gmerge_pkgdir})
+
+    return bool(installed_matches)
diff --git a/lib/gmerge_binhost_unittest.py b/lib/gmerge_binhost_unittest.py
new file mode 100644
index 0000000..42b29eb
--- /dev/null
+++ b/lib/gmerge_binhost_unittest.py
@@ -0,0 +1,32 @@
+# Copyright 2024 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unittests for builder.py."""
+
+import unittest
+
+from chromite.lib import build_target_lib
+from chromite.lib import gmerge_binhost
+
+
+# pylint: disable=protected-access
+
+
+class GmergeBinhostTest(unittest.TestCase):
+    """Tests for gmerge_binhost."""
+
+    def testUpdateGmergeBinhost(self):
+        # Use the SDK sysroot, since we can't rely on having already built
+        # packages for a particular board.
+        sysroot = build_target_lib.get_default_sysroot_path()
+        self.assertEqual(
+            True,
+            gmerge_binhost.update_gmerge_binhost(
+                sysroot, ["sys-libs/glibc"], False
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/strip_package.py b/scripts/strip_package.py
index b9344d0..ac0dd99 100644
--- a/scripts/strip_package.py
+++ b/scripts/strip_package.py
@@ -5,22 +5,15 @@
 """Strip packages and place them in <sysroot>/stripped-packages."""
 
 import os
-import site
 from typing import List
 
 from chromite.lib import build_target_lib
 from chromite.lib import commandline
-from chromite.lib import constants
 from chromite.lib import cros_build_lib
+from chromite.lib import gmerge_binhost
 from chromite.lib import install_mask
 
 
-# The builder module lives in the devserver path.
-# pylint: disable=import-error,wrong-import-position
-site.addsitedir(constants.SOURCE_ROOT / "src" / "platform" / "dev")
-import builder
-
-
 def create_parser() -> commandline.ArgumentParser:
     """Creates the cmdline argparser, populates the options and description."""
     parser = commandline.ArgumentParser(description=__doc__)
@@ -64,6 +57,8 @@
 
     os.environ["DEFAULT_INSTALL_MASK"] = "\n".join(install_mask.DEFAULT)
 
-    if not builder.UpdateGmergeBinhost(sysroot, options.packages, options.deep):
+    if not gmerge_binhost.update_gmerge_binhost(
+        sysroot, options.packages, options.deep
+    ):
         return 1
     return 0
diff --git a/scripts/strip_package_unittest.py b/scripts/strip_package_unittest.py
index eb9463b..610ae5f 100644
--- a/scripts/strip_package_unittest.py
+++ b/scripts/strip_package_unittest.py
@@ -17,8 +17,8 @@
 
     def setUp(self) -> None:
         self.sysroot_path = "/build/testboard"
-        self.builder_mock = self.PatchObject(
-            strip_package.builder, "UpdateGmergeBinhost"
+        self.gmerge_binhost_mock = self.PatchObject(
+            strip_package.gmerge_binhost, "update_gmerge_binhost"
         )
         self.PatchObject(
             build_target_lib,
@@ -29,19 +29,23 @@
     def testDefaultSysroot(self) -> None:
         """Test the base case."""
         strip_package.main(["--board=testboard", "foo"])
-        self.builder_mock.assert_called_with(self.sysroot_path, ["foo"], False)
+        self.gmerge_binhost_mock.assert_called_with(
+            self.sysroot_path, ["foo"], False
+        )
 
     def testMultiplePkg(self) -> None:
         """Test multiple package input."""
         strip_package.main(["--board=testboard", "foo", "foo1"])
-        self.builder_mock.assert_called_with(
+        self.gmerge_binhost_mock.assert_called_with(
             self.sysroot_path, ["foo", "foo1"], False
         )
 
     def testCustomSysroot(self) -> None:
         """Test user given custom sysroot path."""
         strip_package.main(["--sysroot=/build/sysroot", "foo"])
-        self.builder_mock.assert_called_with("/build/sysroot", ["foo"], False)
+        self.gmerge_binhost_mock.assert_called_with(
+            "/build/sysroot", ["foo"], False
+        )
 
     def testInstallMask(self) -> None:
         """Test install mask environment variable."""
@@ -54,4 +58,6 @@
     def testDeepOption(self) -> None:
         """Test Deep option."""
         strip_package.main(["--board=testboard", "--deep", "foo"])
-        self.builder_mock.assert_called_with(self.sysroot_path, ["foo"], True)
+        self.gmerge_binhost_mock.assert_called_with(
+            self.sysroot_path, ["foo"], True
+        )