blob: de1fe9ca429a96acce72b494d6485b44aebc4cec [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! Utilities to handle vendored third-party crates.
use crate::log_err;
use crate::manifest;
use crate::util::AsDebug;
use std::collections::HashMap;
use std::fmt::{self, Display};
use std::fs;
use std::hash::Hash;
use std::io;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use log::{error, warn};
use semver::Version;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Visibility {
/// The crate can be used by any build targets.
Public,
/// The crate can be used by only third-party crates.
ThirdParty,
/// The crate can be used by any test target, and in production by
/// third-party crates.
TestOnlyAndThirdParty,
}
/// Returns a default of `ThirdParty`, which is the most conservative option and
/// generally what we want if one isn't explicitly computed.
impl std::default::Default for Visibility {
fn default() -> Self {
Visibility::ThirdParty
}
}
/// A normalized version as used in third_party/rust crate paths.
///
/// A crate version is identified by the major version, if it's >= 1, or the
/// minor version, if the major version is 0. There is a many-to-one
/// relationship between crate versions and epochs.
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Epoch {
/// Epoch with major version == 0. The field is the minor version. It is an
/// error to use 0: methods may panic in this case.
Minor(u64),
/// Epoch with major version >= 1. It is an error to use 0: methods may
/// panic in this case.
Major(u64),
}
impl Epoch {
/// Get the semver version string for this Epoch. This will only have a
/// non-zero major component, or a zero major component and a non-zero minor
/// component. Note this differs from Epoch's `fmt::Display` impl.
pub fn to_version_string(&self) -> String {
match *self {
// These should never return Err since formatting an integer is
// infallible.
Epoch::Minor(minor) => {
assert_ne!(minor, 0);
format!("0.{minor}")
}
Epoch::Major(major) => {
assert_ne!(major, 0);
format!("{major}")
}
}
}
/// Compute the Epoch from a `semver::Version`. This is useful since we can
/// parse versions from `cargo_metadata` and in Cargo.toml files using the
/// `semver` library.
pub fn from_version(version: &Version) -> Self {
match version.major {
0 => Self::Minor(version.minor.try_into().unwrap()),
x => Self::Major(x.try_into().unwrap()),
}
}
/// Get the requested epoch from a supported dependency version string.
/// `req` should be a version request as used in Cargo.toml's [dependencies]
/// section.
///
/// `req` must use the default strategy as defined in
/// https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies-from-cratesio
pub fn from_version_req_str(req: &str) -> Self {
// For convenience, leverage semver::VersionReq for parsing even
// though we don't need the full expressiveness.
let req = semver::VersionReq::from_str(req).unwrap();
// We require the spec to have exactly one comparator, which must use
// the default strategy.
assert_eq!(req.comparators.len(), 1);
let comp: &semver::Comparator = &req.comparators[0];
// Caret is semver's name for the default strategy.
assert_eq!(comp.op, semver::Op::Caret);
match (comp.major, comp.minor) {
(0, Some(0) | None) => panic!("invalid version req {req}"),
(0, Some(x)) => Epoch::Minor(x),
(x, _) => Epoch::Major(x),
}
}
}
// This gives us a ToString implementation for free.
impl Display for Epoch {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
// These should never return Err since formatting an integer is
// infallible.
Epoch::Minor(minor) => {
assert_ne!(minor, 0);
f.write_fmt(format_args!("v0_{minor}")).unwrap()
}
Epoch::Major(major) => {
assert_ne!(major, 0);
f.write_fmt(format_args!("v{major}")).unwrap()
}
}
Ok(())
}
}
impl FromStr for Epoch {
type Err = EpochParseError;
/// A valid input string is of the form:
/// * "v{i}", where i >= 1, or
/// * "v0_{i}", where i >= 1
///
/// Any other string is invalid. If the "v" is missing, there are extra
/// underscore-separated components, or there are two numbers but both
/// are 0 or greater than zero are all invalid strings.
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Split off the "v" prefix.
let mut iter = s.split_inclusive('v');
if iter.next() != Some("v") {
return Err(EpochParseError::BadFormat);
}
let s = iter.next().ok_or(EpochParseError::BadFormat)?;
if iter.next() != None {
return Err(EpochParseError::BadFormat);
}
// Split the major and minor version numbers.
let mut parts = s.split('_');
let major: Option<u64> =
parts.next().map(|s| s.parse().map_err(EpochParseError::InvalidInt)).transpose()?;
let minor: Option<u64> =
parts.next().map(|s| s.parse().map_err(EpochParseError::InvalidInt)).transpose()?;
// Get the final epoch, checking that the (major, minor) pair is valid.
let result = match (major, minor) {
(Some(0), Some(0)) => Err(EpochParseError::BadVersion),
(Some(0), Some(minor)) => Ok(Epoch::Minor(minor)),
(Some(major), None) => Ok(Epoch::Major(major)),
(Some(_), Some(_)) => Err(EpochParseError::BadVersion),
(None, None) => Err(EpochParseError::BadFormat),
_ => unreachable!(),
}?;
// Ensure there's no remaining parts.
if parts.next() == None { Ok(result) } else { Err(EpochParseError::BadFormat) }
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum EpochParseError {
/// An integer could not be parsed where expected.
InvalidInt(std::num::ParseIntError),
/// The string was not formatted correctly. It was missing the 'v' prefix,
/// was missing the '_' separator, or had a tail after the last integer.
BadFormat,
/// The epoch had an invalid combination of versions: e.g. "v0_0", "v1_0",
/// "v1_1".
BadVersion,
}
impl Display for EpochParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use EpochParseError::*;
match self {
InvalidInt(parse_int_error) => parse_int_error.fmt(f),
BadFormat => f.write_str("epoch string had incorrect format"),
BadVersion => f.write_str("epoch string had invalid version"),
}
}
}
impl std::error::Error for EpochParseError {}
/// A crate name normalized to the format we use in //third_party.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct NormalizedName(String);
impl NormalizedName {
/// Wrap a normalized name, checking that it is valid.
pub fn new(normalized_name: &str) -> Option<NormalizedName> {
let converted_name = Self::from_crate_name(normalized_name);
if converted_name.0 == normalized_name { Some(converted_name) } else { None }
}
/// Normalize a crate name. `crate_name` is the name Cargo uses to refer to
/// the crate.
pub fn from_crate_name(crate_name: &str) -> NormalizedName {
NormalizedName(
crate_name
.chars()
.map(|c| match c {
'-' | '.' => '_',
c => c,
})
.collect(),
)
}
/// Get the wrapped string.
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for NormalizedName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
/// Identifies a crate available in some vendored source. Each crate is uniquely
/// identified by its Cargo.toml package name and version.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct VendoredCrate {
pub name: String,
pub version: Version,
}
impl fmt::Display for VendoredCrate {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.name, self.version)
}
}
impl VendoredCrate {
pub fn normalized_name(&self) -> NormalizedName {
NormalizedName::from_crate_name(&self.name)
}
}
/// Set of vendored packages in `//third_party/rust` format. Namely, foo 1.2.3
/// would be in `<root>/foo/v1/crate` and bar 0.1.2 would be in
/// `<root>/bar/v0_1/crate`. The names also must be normalized according to
/// `NormalizedName` rules. Multiple versions of a name can exist, as long as
/// their "epoch" (vX for 1.0.0+ or vO_Y for <1.0.0) does not collide. This is
/// enforced naturally by the directory layout.
pub struct ThirdPartySource {
/// The available set of versions for each crate.
crate_versions: HashMap<String, Vec<Version>>,
/// As an optimization, cache the parsed manifest for each crate: it's
/// needed later, and we have to parse it here anyway.
manifests: HashMap<VendoredCrate, manifest::CargoPackage>,
}
impl ThirdPartySource {
/// Collects set of vendored crates on disk.
pub fn new(crates_path: &Path) -> io::Result<Self> {
let mut crate_versions = HashMap::<String, Vec<Version>>::new();
let mut manifests = HashMap::new();
for crate_dir in log_err!(
fs::read_dir(crates_path),
"reading dir {crates_path}",
crates_path = AsDebug(crates_path)
)? {
// Look at each crate directory.
let crate_dir: fs::DirEntry = log_err!(crate_dir)?;
if !crate_dir.file_type()?.is_dir() {
continue;
}
let crate_path = crate_dir.path();
// Ensure the path has a valid name: is UTF8, has our normalized format.
let normalized_name = path_as_str(crate_path.file_name().unwrap())?;
into_io_result(NormalizedName::new(normalized_name).ok_or_else(|| {
format!("unnormalized crate name in path {}", crate_path.to_string_lossy())
}))?;
for epoch_dir in fs::read_dir(crate_dir.path())? {
// Look at each epoch of the crate we have checked in.
let epoch_dir: fs::DirEntry = epoch_dir?;
if !epoch_dir.file_type()?.is_dir() {
continue;
}
// Skip it if it's not a valid epoch.
if epoch_dir.file_name().to_str().and_then(|s| Epoch::from_str(s).ok()).is_none() {
continue;
}
let crate_path = epoch_dir.path().join("crate");
let Some((crate_id, manifest)) = get_vendored_crate_info(&crate_path)? else {
warn!("directory name parsed as valid epoch but contained no Cargo.toml: {}",
crate_path.to_string_lossy());
continue;
};
manifests.insert(crate_id.clone(), manifest.package);
crate_versions.entry(crate_id.name).or_default().push(crate_id.version);
}
}
Ok(ThirdPartySource { crate_versions, manifests })
}
/// Find crate with `name` that meets version requirement. Returns `None` if
/// there are none.
pub fn find_match(&self, name: &str, req: &semver::VersionReq) -> Option<VendoredCrate> {
let (key, versions) = self.crate_versions.get_key_value(name)?;
let version = versions.iter().find(|v| req.matches(v))?.clone();
Some(VendoredCrate { name: key.clone(), version })
}
pub fn present_crates(&self) -> &HashMap<VendoredCrate, manifest::CargoPackage> {
&self.manifests
}
/// Get Cargo.toml `[patch]` sections for each third-party crate.
pub fn cargo_patches(&self) -> Vec<manifest::PatchSpecification> {
let mut patches: Vec<_> = self
.manifests
.iter()
.map(|(c, _)| manifest::PatchSpecification {
package_name: c.name.clone(),
patch_name: format!(
"{name}_{epoch}",
name = c.name,
epoch = Epoch::from_version(&c.version)
),
path: Self::crate_path(c),
})
.collect();
// Give patches a stable ordering, instead of the arbitrary HashMap
// order.
patches.sort_unstable_by(|p1, p2| p1.patch_name.cmp(&p2.patch_name));
patches
}
/// Get the root of `id`'s sources relative to the vendor dir.
pub fn crate_path(id: &VendoredCrate) -> PathBuf {
let mut path: PathBuf = Self::build_path(id);
path.push("crate");
path
}
/// Get the BUILD.gn file directory of `id` relative to the vendor dir.
pub fn build_path(id: &VendoredCrate) -> PathBuf {
let mut path: PathBuf = id.normalized_name().0.into();
path.push(Epoch::from_version(&id.version).to_string());
path
}
}
/// Get the subdir name containing `id` in a `cargo vendor` directory.
pub fn std_crate_path(id: &VendoredCrate) -> PathBuf {
format!("{}-{}", id.name, id.version).into()
}
/// Traverse vendored third-party crates in the Rust source package. Each
/// `VendoredCrate` is paired with the package metadata from its manifest. The
/// returned list is in unspecified order.
pub fn collect_std_vendored_crates(
vendor_path: &Path,
) -> io::Result<Vec<(VendoredCrate, manifest::CargoPackage)>> {
let mut crates = Vec::new();
for vendored_crate in fs::read_dir(vendor_path)? {
let vendored_crate: fs::DirEntry = vendored_crate?;
if !vendored_crate.file_type()?.is_dir() {
continue;
}
let Some((crate_id, manifest)) = get_vendored_crate_info(&vendored_crate.path())? else {
error!("Cargo.toml not found at {}. cargo vendor would not do that to us.",
vendored_crate.path().to_string_lossy());
panic!()
};
// Vendored crate directories can be named "{package_name}" or
// "{package_name}-{version}", but for now we only use the latter for
// std vendored deps. For simplicity, accept only that.
let dir_name = vendored_crate.file_name().to_string_lossy().into_owned();
if std_crate_path(&crate_id) != Path::new(&dir_name) {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"directory name {dir_name} does not match package information for {crate_id:?}"
),
));
}
crates.push((crate_id, manifest.package));
}
Ok(crates)
}
/// Get a crate's ID and parsed manifest from its path. Returns `Ok(None)` if
/// there was no Cargo.toml, or `Err(_)` for other IO errors.
fn get_vendored_crate_info(
package_path: &Path,
) -> io::Result<Option<(VendoredCrate, manifest::CargoManifest)>> {
let manifest_file = match fs::read_to_string(package_path.join("Cargo.toml")) {
Ok(f) => f,
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(e),
};
let manifest: manifest::CargoManifest = toml::de::from_str(&manifest_file).unwrap();
let crate_id = VendoredCrate {
name: manifest.package.name.as_str().into(),
version: manifest.package.version.clone(),
};
Ok(Some((crate_id, manifest)))
}
/// Utility to read a path as a `&str` with an informative error message if it
/// had invalid UTF8.
fn path_as_str<T: AsRef<Path> + ?Sized>(path: &T) -> io::Result<&str> {
let path = path.as_ref();
into_io_result(
path.to_str().ok_or_else(|| format!("invalid utf8 in path {}", path.to_string_lossy())),
)
}
/// Utility to convert a `Result<T, E>` into `io::Result<T>` for any compatible
/// error type `E`.
fn into_io_result<T, E: Into<Box<dyn std::error::Error + Send + Sync>>>(
result: Result<T, E>,
) -> io::Result<T> {
result.map_err(|e| io::Error::new(io::ErrorKind::Other, e))
}