blob: 5db8965a327737c1695d009cb4a315b938337203 [file] [log] [blame]
# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#!/usr/bin/env python3
import argparse
import json
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
import sys
from urllib.parse import urlparse
# Data source embedded in the script
# Only includes documents EXTERNAL to the chromium/src repository.
DOCUMENT_MANIFEST = {
"description": "Manifest of externally sourced documents cached for AI assistant context.",
"remote_documents": [
{
"source_url": "https://gn.googlesource.com/gn/+/main/docs/style_guide.md",
"cached_path": "style_guide.md",
"subdir": "gn",
"description": "Style guide for writing clean and maintainable BUILD.gn files."
},
]
}
def parse_gob_url(url):
"""Parses a Git-on-Borg URL into repo URL, branch, and file path."""
try:
parts = url.split('/+/')
repo_url = parts[0]
if len(parts) < 2:
raise ValueError(f"Invalid GoB URL format: {url} - missing '/+/'")
path_parts = parts[1].split('/', 1)
branch = path_parts[0]
file_path = path_parts[1]
return repo_url, branch, file_path
except Exception as e:
raise ValueError(f"Error parsing URL {url}: {e}")
def replace_non_inclusive_language(file_path):
"""Replaces non-inclusive terms to align with Chromium's guidelines.
This function is called after fetching external documentation to ensure that
the cached files pass the Chromium presubmit check for inclusive language.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Perform case-insensitive replacements.
content = content.replace("whitelist", "allowlist") # nocheck
content = content.replace("blacklist", "denylist") # nocheck
content = content.replace("Whitelist", "Allowlist") # nocheck
content = content.replace("Blacklist", "Denylist") # nocheck
content = content.replace("master", "main") # nocheck
content = content.replace("Master", "Main") # nocheck
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f" Applied non-inclusive language replacements to {file_path.name}")
except Exception as e:
print(f" Could not process file {file_path}: {e}")
def fetch_doc_with_git(repo_url, branch, file_path, output_file):
"""Fetches a single file from a git repo using a shallow clone into a temporary directory."""
with tempfile.TemporaryDirectory() as tmpdir:
tmp_path = Path(tmpdir)
print(f" Cloning {repo_url} (branch: {branch}) into temporary directory {tmp_path}")
try:
subprocess.run(
['git', 'clone', '--depth', '1', '--branch', branch, '--no-checkout', '--filter=blob:none', repo_url, "."],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=300
)
subprocess.run(
['git', 'sparse-checkout', 'init', '--cone'],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
)
subprocess.run(
['git', 'sparse-checkout', 'set', file_path],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=60
)
print(f" Checking out {file_path}...")
subprocess.run(
['git', 'checkout'],
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=tmp_path, timeout=120
)
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError) as e:
print(f" Git operation failed for {repo_url}: {e}")
return False
source_file = tmp_path / file_path
if source_file.exists():
print(f" Copying {file_path} to {output_file}")
try:
shutil.copyfile(source_file, output_file)
replace_non_inclusive_language(output_file)
return True
except Exception as e:
print(f" Error copying file: {e}")
return False
else:
print(f" Error: File {file_path} not found in repository after sparse checkout.")
return False
return False
def fetch_and_cache_docs(manifest, base_output_dir, force=False):
"""Fetches documents from URLs specified in the manifest."""
print(f"Starting doc refresh. Base output dir: {base_output_dir}")
base_output_dir.mkdir(parents=True, exist_ok=True)
successes = []
failures = []
remote_docs = manifest.get("remote_documents", [])
if not remote_docs:
print("No remote documents to fetch.")
return
for doc in remote_docs:
source_url = doc.get("source_url")
cached_path = doc.get("cached_path")
subdir = doc.get("subdir")
description = doc.get("description", "No description")
if not source_url or not cached_path or not subdir:
print(f" Skipping invalid entry (missing source_url, cached_path, or subdir): {doc}")
failures.append(f"{description} (Invalid Manifest Entry)")
continue
output_dir = base_output_dir / subdir
output_dir.mkdir(parents=True, exist_ok=True)
output_file = output_dir / cached_path
print(f" Processing: {description} -> {subdir}/{cached_path}")
print(f" Source URL: {source_url}")
try:
repo_url, branch, file_path = parse_gob_url(source_url)
if fetch_doc_with_git(repo_url, branch, file_path, output_file):
successes.append(f"{subdir}/{cached_path}")
else:
failures.append(f"{subdir}/{cached_path} (Fetch Failed)")
except ValueError as e:
print(f" Skipping {source_url}: {e}")
failures.append(f"{subdir}/{cached_path} (URL Parse Error)")
except Exception as e:
print(f" An unexpected error occurred for {source_url}: {e}")
failures.append(f"{subdir}/{cached_path} (Unexpected Error)")
print("")
print("--- Refresh Summary ---")
print(f"Successfully updated: {len(successes)}")
print(f"Failed: {len(failures)}")
if failures:
print("\nFailed documents:")
for f in failures:
print(f" - {f}")
if not force:
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description="Refresh the cached external documentation.")
parser.add_argument(
'--force',
action='store_true',
help='Continue and exit successfully even if some documents fail to update.'
)
args = parser.parse_args()
script_dir = Path(__file__).resolve().parent
base_output_dir = script_dir
git_root = script_dir.parent.parent
print(f"Base output directory: {base_output_dir}")
print(f"Assumed git root: {git_root}")
if not (git_root / ".git").exists():
print(f"Error: Git root not found at {git_root}. Please run this script from within the Chromium source tree.")
sys.exit(1)
fetch_and_cache_docs(DOCUMENT_MANIFEST, base_output_dir, args.force)
print("Document refresh complete.")
try:
print(f"Adding changes in {script_dir.relative_to(git_root)} to git index...")
subprocess.run(['git', 'add', str(script_dir.relative_to(git_root))], check=True, cwd=git_root)
print("Changes added to git index.")
print("Please review and commit the changes.")
except Exception as e:
print(f"An error occurred while running git add: {e}")
if __name__ == "__main__":
main()