Fix the node_modules download hook.
Currently we use download_from_google_storage to download
the node_modules.tar.gz archive from GCS; however, the archive
contains a .bin/ directory containing a bunch of symlinks, and
download_from_google_storage won't let you decompress such an
archive.
In theory we could (should?) add a flag to download_from_google_storage
to support this, now that symlinks should work everywhere, but in
the meantime this CL replaces download_from_google_storage with a
custom download script that does the same thing but allows such
archive members.
Change-Id: I1467f95197aae7dee2c1914916e4caa0fd0a381d
Reviewed-on: https://chromium-review.googlesource.com/c/experimental/website/+/3246590
Reviewed-by: Struan Shrimpton <sshrimp@google.com>
Commit-Queue: Dirk Pranke <dpranke@google.com>
diff --git a/DEPS b/DEPS
index 2a6e0dd..abdd172 100644
--- a/DEPS
+++ b/DEPS
@@ -98,15 +98,10 @@
],
},
{
- 'name': 'node_modules',
+ 'name': 'fetch_node_modules',
'pattern': '.',
- 'action': [ 'vpython3',
- 'third_party/depot_tools/download_from_google_storage.py',
- '--no_resume',
- '--extract',
- '--no_auth',
- '--bucket', 'dpranke-chromium-website-exp-storage',
- '-s', 'node_modules.tar.gz.sha1',
+ 'action': [ 'python3',
+ 'scripts/fetch_node_modules.py'
],
},
{
diff --git a/scripts/fetch_node_modules.py b/scripts/fetch_node_modules.py
new file mode 100755
index 0000000..a4a89de
--- /dev/null
+++ b/scripts/fetch_node_modules.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Simple script to download the pinned Node modules from GCS.
+
+This script exists because the node_modules archive currently contains
+a node_modules/.bin directory with a bunch of symlinked files in it,
+and download_from_google_storage.py won't let you have archives with
+symlinks.
+
+In theory we should probably rebuild the node_modules distro without
+the ./bin directory (using `npm install --no-bin-lnks`) but that would
+cause the build scripts to fail and we'd have to replace `npmw` with
+something else.
+"""
+
+import argparse
+import hashlib
+import os
+import subprocess
+import sys
+import tarfile
+
+SRC_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.parse_args()
+
+ with open(os.path.join(SRC_ROOT, 'node_modules.tar.gz.sha1')) as fp:
+ expected_sha1 = fp.read().strip()
+
+ actual_sha1 = None
+ tgz = os.path.join(SRC_ROOT, 'node_modules.tar.gz')
+ if os.path.exists(tgz):
+ with open(tgz, 'rb') as fp:
+ s = hashlib.sha1()
+ s.update(fp.read())
+ actual_sha1 = s.hexdigest()
+
+ # TODO(dpranke): Consider whether we should validate that node_modules/
+ # and all of the expected files exist as well.
+ if actual_sha1 == expected_sha1:
+ return 0
+
+ retcode = subprocess.call([
+ sys.executable,
+ os.path.join(SRC_ROOT, 'third_party', 'depot_tools', 'gsutil.py'),
+ 'cp',
+ 'gs://dpranke-chromium-website-exp-storage/%s' % expected_sha1,
+ tgz
+ ])
+ if retcode:
+ return retcode
+
+ try:
+ # TODO(dpranke): download_from_google_storage puts in a fair amount
+ # of effort to not clobber an existing directory until it is sure it
+ # can extract the archive completely. Consider whether we should do
+ # the same.
+ with tarfile.open(tgz, 'r:gz') as tar:
+ tar.extractall(path=SRC_ROOT)
+ return 0
+ except Exception as e:
+ print(e)
+ return 1
+
+if __name__ == '__main__':
+ sys.exit(main())