diff --git a/src/cargo/core/dependency.rs b/src/cargo/core/dependency.rs index 2d586b5dfa1..e4358a24178 100644 --- a/src/cargo/core/dependency.rs +++ b/src/cargo/core/dependency.rs @@ -386,8 +386,16 @@ impl Dependency { self.source_id(), id ); - self.set_version_req(VersionReq::exact(id.version())) - .set_source_id(id.source_id()) + let me = Rc::make_mut(&mut self.inner); + me.req = VersionReq::exact(id.version()); + + // Only update the `precise` of this source to preserve other + // information about dependency's source which may not otherwise be + // tested during equality/hashing. + me.source_id = me + .source_id + .with_precise(id.source_id().precise().map(|s| s.to_string())); + self } /// Returns `true` if this is a "locked" dependency, basically whether it has diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index d8b784d1db0..0380c447d39 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -1,16 +1,15 @@ use std::collections::{HashMap, HashSet}; -use anyhow::bail; -use log::{debug, trace}; -use semver::VersionReq; -use url::Url; - use crate::core::PackageSet; use crate::core::{Dependency, PackageId, Source, SourceId, SourceMap, Summary}; use crate::sources::config::SourceConfigMap; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::{profile, CanonicalUrl, Config}; +use anyhow::bail; +use log::{debug, trace}; +use semver::VersionReq; +use url::Url; /// Source of information about a group of packages. /// @@ -135,14 +134,14 @@ impl<'cfg> PackageRegistry<'cfg> { // We've previously loaded this source, and we've already locked it, // so we're not allowed to change it even if `namespace` has a // slightly different precise version listed. - Some(&(_, Kind::Locked)) => { + Some((_, Kind::Locked)) => { debug!("load/locked {}", namespace); return Ok(()); } // If the previous source was not a precise source, then we can be // sure that it's already been updated if we've already loaded it. - Some(&(ref previous, _)) if previous.precise().is_none() => { + Some((previous, _)) if previous.precise().is_none() => { debug!("load/precise {}", namespace); return Ok(()); } @@ -150,7 +149,7 @@ impl<'cfg> PackageRegistry<'cfg> { // If the previous source has the same precise version as we do, // then we're done, otherwise we need to need to move forward // updating this source. - Some(&(ref previous, _)) => { + Some((previous, _)) => { if previous.precise() == namespace.precise() { debug!("load/match {}", namespace); return Ok(()); diff --git a/src/cargo/core/resolver/context.rs b/src/cargo/core/resolver/context.rs index 95f4a9fe663..bc3fa91c77c 100644 --- a/src/cargo/core/resolver/context.rs +++ b/src/cargo/core/resolver/context.rs @@ -1,16 +1,13 @@ -use std::collections::HashMap; -use std::num::NonZeroU64; - -use anyhow::format_err; -use log::debug; - -use crate::core::{Dependency, PackageId, SourceId, Summary}; -use crate::util::interning::InternedString; -use crate::util::Graph; - use super::dep_cache::RegistryQueryer; use super::errors::ActivateResult; use super::types::{ConflictMap, ConflictReason, FeaturesSet, ResolveOpts}; +use crate::core::{Dependency, PackageId, SourceId, Summary}; +use crate::util::interning::InternedString; +use crate::util::Graph; +use anyhow::format_err; +use log::debug; +use std::collections::HashMap; +use std::num::NonZeroU64; pub use super::encode::Metadata; pub use super::encode::{EncodableDependency, EncodablePackageId, EncodableResolve}; diff --git a/src/cargo/core/resolver/dep_cache.rs b/src/cargo/core/resolver/dep_cache.rs index b9d681fd401..4dd9e7c94a3 100644 --- a/src/cargo/core/resolver/dep_cache.rs +++ b/src/cargo/core/resolver/dep_cache.rs @@ -9,20 +9,19 @@ //! //! This module impl that cache in all the gory details -use std::cmp::Ordering; -use std::collections::{BTreeSet, HashMap, HashSet}; -use std::rc::Rc; - -use log::debug; - use crate::core::resolver::context::Context; use crate::core::resolver::errors::describe_path; +use crate::core::resolver::types::{ConflictReason, DepInfo, FeaturesSet}; +use crate::core::resolver::{ActivateResult, ResolveOpts}; use crate::core::{Dependency, FeatureValue, PackageId, PackageIdSpec, Registry, Summary}; +use crate::core::{GitReference, SourceId}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; - -use crate::core::resolver::types::{ConflictReason, DepInfo, FeaturesSet}; -use crate::core::resolver::{ActivateResult, ResolveOpts}; +use crate::util::Config; +use log::debug; +use std::cmp::Ordering; +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::rc::Rc; pub struct RegistryQueryer<'a> { pub registry: &'a mut (dyn Registry + 'a), @@ -41,6 +40,10 @@ pub struct RegistryQueryer<'a> { >, /// all the cases we ended up using a supplied replacement used_replacements: HashMap, + /// Where to print warnings, if configured. + config: Option<&'a Config>, + /// Sources that we've already wared about possibly colliding in the future. + warned_git_collisions: HashSet, } impl<'a> RegistryQueryer<'a> { @@ -49,6 +52,7 @@ impl<'a> RegistryQueryer<'a> { replacements: &'a [(PackageIdSpec, Dependency)], try_to_use: &'a HashSet, minimal_versions: bool, + config: Option<&'a Config>, ) -> Self { RegistryQueryer { registry, @@ -58,6 +62,8 @@ impl<'a> RegistryQueryer<'a> { registry_cache: HashMap::new(), summary_cache: HashMap::new(), used_replacements: HashMap::new(), + config, + warned_git_collisions: HashSet::new(), } } @@ -69,6 +75,44 @@ impl<'a> RegistryQueryer<'a> { self.used_replacements.get(&p) } + /// Issues a future-compatible warning targeted at removing reliance on + /// unifying behavior between these two dependency directives: + /// + /// ```toml + /// [dependencies] + /// a = { git = 'https://example.org/foo' } + /// a = { git = 'https://example.org/foo', branch = 'master } + /// ``` + /// + /// Historical versions of Cargo considered these equivalent but going + /// forward we'd like to fix this. For more details see the comments in + /// src/cargo/sources/git/utils.rs + fn warn_colliding_git_sources(&mut self, id: SourceId) -> CargoResult<()> { + let config = match self.config { + Some(config) => config, + None => return Ok(()), + }; + let prev = match self.warned_git_collisions.replace(id) { + Some(prev) => prev, + None => return Ok(()), + }; + match (id.git_reference(), prev.git_reference()) { + (Some(GitReference::DefaultBranch), Some(GitReference::Branch(b))) + | (Some(GitReference::Branch(b)), Some(GitReference::DefaultBranch)) + if b == "master" => {} + _ => return Ok(()), + } + + config.shell().warn(&format!( + "two git dependencies found for `{}` \ + where one uses `branch = \"master\"` and the other doesn't; \ + this will break in a future version of Cargo, so please \ + ensure the dependency forms are consistent", + id.url(), + ))?; + Ok(()) + } + /// Queries the `registry` to return a list of candidates for `dep`. /// /// This method is the location where overrides are taken into account. If @@ -76,6 +120,7 @@ impl<'a> RegistryQueryer<'a> { /// applied by performing a second query for what the override should /// return. pub fn query(&mut self, dep: &Dependency) -> CargoResult>> { + self.warn_colliding_git_sources(dep.source_id())?; if let Some(out) = self.registry_cache.get(dep).cloned() { return Ok(out); } diff --git a/src/cargo/core/resolver/encode.rs b/src/cargo/core/resolver/encode.rs index 7b30c470a6b..ce3ade40b19 100644 --- a/src/cargo/core/resolver/encode.rs +++ b/src/cargo/core/resolver/encode.rs @@ -42,6 +42,13 @@ //! Listed from most recent to oldest, these are some of the changes we've made //! to `Cargo.lock`'s serialization format: //! +//! * A `version` marker is now at the top of the lock file which is a way for +//! super-old Cargos (at least since this was implemented) to give a formal +//! error if they see a lock file from a super-future Cargo. Additionally as +//! part of this change the encoding of `git` dependencies in lock files +//! changed where `branch = "master"` is now encoded with `branch=master` +//! instead of with nothing at all. +//! //! * The entries in `dependencies` arrays have been shortened and the //! `checksum` field now shows up directly in `[[package]]` instead of always //! at the end of the file. The goal of this change was to ideally reduce @@ -89,25 +96,24 @@ //! special fashion to make sure we have strict control over the on-disk //! format. -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::fmt; -use std::str::FromStr; - +use super::{Resolve, ResolveVersion}; +use crate::core::{Dependency, GitReference, Package, PackageId, SourceId, Workspace}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::interning::InternedString; +use crate::util::{internal, Graph}; +use anyhow::bail; use log::debug; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; - -use crate::core::{Dependency, Package, PackageId, SourceId, Workspace}; -use crate::util::errors::{CargoResult, CargoResultExt}; -use crate::util::interning::InternedString; -use crate::util::{internal, Graph}; - -use super::{Resolve, ResolveVersion}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt; +use std::str::FromStr; /// The `Cargo.lock` structure. #[derive(Serialize, Deserialize, Debug)] pub struct EncodableResolve { + version: Option, package: Option>, /// `root` is optional to allow backward compatibility. root: Option, @@ -136,8 +142,19 @@ impl EncodableResolve { let path_deps = build_path_deps(ws); let mut checksums = HashMap::new(); - // We assume an older format is being parsed until we see so otherwise. - let mut version = ResolveVersion::V1; + let mut version = match self.version { + Some(3) => ResolveVersion::V3, + Some(n) => bail!( + "lock file version `{}` was found, but this version of Cargo \ + does not understand this lock file, perhaps Cargo needs \ + to be updated?", + n, + ), + // Historically Cargo did not have a version indicator in lock + // files, so this could either be the V1 or V2 encoding. We assume + // an older format is being parsed until we see so otherwise. + None => ResolveVersion::V1, + }; let packages = { let mut packages = self.package.unwrap_or_default(); @@ -176,7 +193,7 @@ impl EncodableResolve { // that here, and we also bump our version up to 2 since V1 // didn't ever encode this field. if let Some(cksum) = &pkg.checksum { - version = ResolveVersion::V2; + version = version.max(ResolveVersion::V2); checksums.insert(id, Some(cksum.clone())); } @@ -213,7 +230,7 @@ impl EncodableResolve { let by_source = match &enc_id.version { Some(version) => by_version.get(version)?, None => { - version = ResolveVersion::V2; + version = version.max(ResolveVersion::V2); if by_version.len() == 1 { by_version.values().next().unwrap() } else { @@ -245,7 +262,7 @@ impl EncodableResolve { // the lock file } else if by_source.len() == 1 { let id = by_source.values().next().unwrap(); - version = ResolveVersion::V2; + version = version.max(ResolveVersion::V2); Some(*id) // ... and failing that we probably had a bad git merge of @@ -317,7 +334,7 @@ impl EncodableResolve { // If `checksum` was listed in `[metadata]` but we were previously // listed as `V2` then assume some sort of bad git merge happened, so // discard all checksums and let's regenerate them later. - if !to_remove.is_empty() && version == ResolveVersion::V2 { + if !to_remove.is_empty() && version >= ResolveVersion::V2 { checksums.drain(); } for k in to_remove { @@ -539,13 +556,13 @@ impl<'a> ser::Serialize for Resolve { let mut metadata = self.metadata().clone(); - if *self.version() == ResolveVersion::V1 { + if self.version() == ResolveVersion::V1 { for &id in ids.iter().filter(|id| !id.source_id().is_path()) { let checksum = match self.checksums()[&id] { Some(ref s) => &s[..], None => "", }; - let id = encodable_package_id(id, &state); + let id = encodable_package_id(id, &state, self.version()); metadata.insert(format!("checksum {}", id.to_string()), checksum.to_string()); } } @@ -566,9 +583,10 @@ impl<'a> ser::Serialize for Resolve { source: encode_source(id.source_id()), dependencies: None, replace: None, - checksum: match self.version() { - ResolveVersion::V2 => self.checksums().get(id).and_then(|x| x.clone()), - ResolveVersion::V1 => None, + checksum: if self.version() >= ResolveVersion::V2 { + self.checksums().get(id).and_then(|x| x.clone()) + } else { + None }, }) .collect(), @@ -578,6 +596,10 @@ impl<'a> ser::Serialize for Resolve { root: None, metadata, patch, + version: match self.version() { + ResolveVersion::V3 => Some(3), + ResolveVersion::V2 | ResolveVersion::V1 => None, + }, } .serialize(s) } @@ -589,7 +611,7 @@ pub struct EncodeState<'a> { impl<'a> EncodeState<'a> { pub fn new(resolve: &'a Resolve) -> EncodeState<'a> { - let counts = if *resolve.version() == ResolveVersion::V2 { + let counts = if resolve.version() >= ResolveVersion::V2 { let mut map = HashMap::new(); for id in resolve.iter() { let slot = map @@ -613,11 +635,14 @@ fn encodable_resolve_node( state: &EncodeState<'_>, ) -> EncodableDependency { let (replace, deps) = match resolve.replacement(id) { - Some(id) => (Some(encodable_package_id(id, state)), None), + Some(id) => ( + Some(encodable_package_id(id, state, resolve.version())), + None, + ), None => { let mut deps = resolve .deps_not_replaced(id) - .map(|(id, _)| encodable_package_id(id, state)) + .map(|(id, _)| encodable_package_id(id, state, resolve.version())) .collect::>(); deps.sort(); (None, Some(deps)) @@ -630,16 +655,30 @@ fn encodable_resolve_node( source: encode_source(id.source_id()), dependencies: deps, replace, - checksum: match resolve.version() { - ResolveVersion::V2 => resolve.checksums().get(&id).and_then(|s| s.clone()), - ResolveVersion::V1 => None, + checksum: if resolve.version() >= ResolveVersion::V2 { + resolve.checksums().get(&id).and_then(|s| s.clone()) + } else { + None }, } } -pub fn encodable_package_id(id: PackageId, state: &EncodeState<'_>) -> EncodablePackageId { +pub fn encodable_package_id( + id: PackageId, + state: &EncodeState<'_>, + resolve_version: ResolveVersion, +) -> EncodablePackageId { let mut version = Some(id.version().to_string()); - let mut source = encode_source(id.source_id()).map(|s| s.with_precise(None)); + let mut id_to_encode = id.source_id(); + if resolve_version <= ResolveVersion::V2 { + if let Some(GitReference::Branch(b)) = id_to_encode.git_reference() { + if b == "master" { + id_to_encode = + SourceId::for_git(id_to_encode.url(), GitReference::DefaultBranch).unwrap(); + } + } + } + let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None)); if let Some(counts) = &state.counts { let version_counts = &counts[&id.name()]; if version_counts[&id.version()] == 1 { diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index b7197e840b4..dbc4fe8be8d 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -133,7 +133,8 @@ pub fn resolve( Some(config) => config.cli_unstable().minimal_versions, None => false, }; - let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions); + let mut registry = + RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config); let cx = activate_deps_loop(cx, &mut registry, summaries, config)?; let mut cksums = HashMap::new(); @@ -1071,7 +1072,7 @@ fn check_duplicate_pkgs_in_lockfile(resolve: &Resolve) -> CargoResult<()> { let mut unique_pkg_ids = HashMap::new(); let state = encode::EncodeState::new(resolve); for pkg_id in resolve.iter() { - let encodable_pkd_id = encode::encodable_package_id(pkg_id, &state); + let encodable_pkd_id = encode::encodable_package_id(pkg_id, &state, resolve.version()); if let Some(prev_pkg_id) = unique_pkg_ids.insert(encodable_pkd_id, pkg_id) { anyhow::bail!( "package collision in the lockfile: packages {} and {} are different, \ diff --git a/src/cargo/core/resolver/resolve.rs b/src/cargo/core/resolver/resolve.rs index c0bb53c7d9f..c9903d0503e 100644 --- a/src/cargo/core/resolver/resolve.rs +++ b/src/cargo/core/resolver/resolve.rs @@ -66,6 +66,11 @@ pub enum ResolveVersion { /// listed inline. Introduced in 2019 in version 1.38. New lockfiles use /// V2 by default starting in 1.41. V2, + /// A format that explicitly lists a `version` at the top of the file as + /// well as changing how git dependencies are encoded. Dependencies with + /// `branch = "master"` are no longer encoded the same way as those without + /// branch specifiers. + V3, } impl Resolve { @@ -374,8 +379,8 @@ unable to verify that `{0}` is the same as when the lockfile was generated /// Returns the version of the encoding that's being used for this lock /// file. - pub fn version(&self) -> &ResolveVersion { - &self.version + pub fn version(&self) -> ResolveVersion { + self.version } pub fn summary(&self, pkg_id: PackageId) -> &Summary { diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index a493a18072c..b452dd9e902 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -1,3 +1,10 @@ +use crate::core::PackageId; +use crate::sources::DirectorySource; +use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX}; +use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl}; +use log::trace; +use serde::de; +use serde::ser; use std::cmp::{self, Ordering}; use std::collections::HashSet; use std::fmt::{self, Formatter}; @@ -5,19 +12,10 @@ use std::hash::{self, Hash}; use std::path::Path; use std::ptr; use std::sync::Mutex; - -use log::trace; -use serde::de; -use serde::ser; use url::Url; -use crate::core::PackageId; -use crate::sources::DirectorySource; -use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX}; -use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl}; - lazy_static::lazy_static! { - static ref SOURCE_ID_CACHE: Mutex> = Mutex::new(HashSet::new()); + static ref SOURCE_ID_CACHE: Mutex> = Default::default(); } /// Unique identifier for a source of packages. @@ -369,15 +367,79 @@ impl SourceId { } } +impl PartialEq for SourceId { + fn eq(&self, other: &SourceId) -> bool { + self.cmp(other) == Ordering::Equal + } +} + impl PartialOrd for SourceId { fn partial_cmp(&self, other: &SourceId) -> Option { Some(self.cmp(other)) } } +// Custom comparison defined as canonical URL equality for git sources and URL +// equality for other sources, ignoring the `precise` and `name` fields. impl Ord for SourceId { fn cmp(&self, other: &SourceId) -> Ordering { - self.inner.cmp(other.inner) + // If our interior pointers are to the exact same `SourceIdInner` then + // we're guaranteed to be equal. + if ptr::eq(self.inner, other.inner) { + return Ordering::Equal; + } + + // Sort first based on `kind`, deferring to the URL comparison below if + // the kinds are equal. + match (&self.inner.kind, &other.inner.kind) { + (SourceKind::Path, SourceKind::Path) => {} + (SourceKind::Path, _) => return Ordering::Less, + (_, SourceKind::Path) => return Ordering::Greater, + + (SourceKind::Registry, SourceKind::Registry) => {} + (SourceKind::Registry, _) => return Ordering::Less, + (_, SourceKind::Registry) => return Ordering::Greater, + + (SourceKind::LocalRegistry, SourceKind::LocalRegistry) => {} + (SourceKind::LocalRegistry, _) => return Ordering::Less, + (_, SourceKind::LocalRegistry) => return Ordering::Greater, + + (SourceKind::Directory, SourceKind::Directory) => {} + (SourceKind::Directory, _) => return Ordering::Less, + (_, SourceKind::Directory) => return Ordering::Greater, + + (SourceKind::Git(a), SourceKind::Git(b)) => { + use GitReference::*; + let ord = match (a, b) { + (Tag(a), Tag(b)) => a.cmp(b), + (Tag(_), _) => Ordering::Less, + (_, Tag(_)) => Ordering::Greater, + + (Rev(a), Rev(b)) => a.cmp(b), + (Rev(_), _) => Ordering::Less, + (_, Rev(_)) => Ordering::Greater, + + // See module comments in src/cargo/sources/git/utils.rs + // for why `DefaultBranch` is treated specially here. + (Branch(a), DefaultBranch) => a.as_str().cmp("master"), + (DefaultBranch, Branch(b)) => "master".cmp(b), + (Branch(a), Branch(b)) => a.cmp(b), + (DefaultBranch, DefaultBranch) => Ordering::Equal, + }; + if ord != Ordering::Equal { + return ord; + } + } + } + + // If the `kind` and the `url` are equal, then for git sources we also + // ensure that the canonical urls are equal. + match (&self.inner.kind, &other.inner.kind) { + (SourceKind::Git(_), SourceKind::Git(_)) => { + self.inner.canonical_url.cmp(&other.inner.canonical_url) + } + _ => self.inner.url.cmp(&other.inner.url), + } } } @@ -441,60 +503,37 @@ impl fmt::Display for SourceId { } } -// Custom equality defined as canonical URL equality for git sources and -// URL equality for other sources, ignoring the `precise` and `name` fields. -impl PartialEq for SourceId { - fn eq(&self, other: &SourceId) -> bool { - if ptr::eq(self.inner, other.inner) { - return true; - } - if self.inner.kind != other.inner.kind { - return false; - } - if self.inner.url == other.inner.url { - return true; - } - - match (&self.inner.kind, &other.inner.kind) { - (SourceKind::Git(ref1), SourceKind::Git(ref2)) => { - ref1 == ref2 && self.inner.canonical_url == other.inner.canonical_url - } - _ => false, - } - } -} - -impl PartialOrd for SourceIdInner { - fn partial_cmp(&self, other: &SourceIdInner) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for SourceIdInner { - fn cmp(&self, other: &SourceIdInner) -> Ordering { - match self.kind.cmp(&other.kind) { - Ordering::Equal => {} - ord => return ord, - } - match self.url.cmp(&other.url) { - Ordering::Equal => {} - ord => return ord, - } - match (&self.kind, &other.kind) { - (SourceKind::Git(ref1), SourceKind::Git(ref2)) => { - (ref1, &self.canonical_url).cmp(&(ref2, &other.canonical_url)) - } - _ => self.kind.cmp(&other.kind), - } - } -} - // The hash of SourceId is used in the name of some Cargo folders, so shouldn't // vary. `as_str` gives the serialisation of a url (which has a spec) and so // insulates against possible changes in how the url crate does hashing. impl Hash for SourceId { fn hash(&self, into: &mut S) { - self.inner.kind.hash(into); + match &self.inner.kind { + SourceKind::Git(GitReference::Tag(a)) => { + 0u8.hash(into); + a.hash(into); + } + SourceKind::Git(GitReference::Rev(a)) => { + 1u8.hash(into); + a.hash(into); + } + SourceKind::Git(GitReference::Branch(a)) => { + 2u8.hash(into); + a.hash(into); + } + // For now hash `DefaultBranch` the same way as `Branch("master")`, + // and for more details see module comments in + // src/cargo/sources/git/utils.rs for why `DefaultBranch` + SourceKind::Git(GitReference::DefaultBranch) => { + 2u8.hash(into); + "master".hash(into); + } + + SourceKind::Path => 4u8.hash(into), + SourceKind::Registry => 5u8.hash(into), + SourceKind::LocalRegistry => 6u8.hash(into), + SourceKind::Directory => 7u8.hash(into), + } match self.inner.kind { SourceKind::Git(_) => self.inner.canonical_url.hash(into), _ => self.inner.url.as_str().hash(into), @@ -553,7 +592,7 @@ impl GitReference { /// Returns a `Display`able view of this git reference, or None if using /// the head of the default branch pub fn pretty_ref(&self) -> Option> { - match *self { + match self { GitReference::DefaultBranch => None, _ => Some(PrettyRef { inner: self }), } diff --git a/src/cargo/ops/lockfile.rs b/src/cargo/ops/lockfile.rs index 35417ab07ad..05ffc0bdaa7 100644 --- a/src/cargo/ops/lockfile.rs +++ b/src/cargo/ops/lockfile.rs @@ -120,6 +120,10 @@ fn resolve_to_string_orig( } } + if let Some(version) = toml.get("version") { + out.push_str(&format!("version = {}\n\n", version)); + } + let deps = toml["package"].as_array().unwrap(); for dep in deps { let dep = dep.as_table().unwrap(); @@ -147,12 +151,9 @@ fn resolve_to_string_orig( // encodings going forward, though, we want to be sure that our encoded lock // file doesn't contain any trailing newlines so trim out the extra if // necessary. - match resolve.version() { - ResolveVersion::V1 => {} - _ => { - while out.ends_with("\n\n") { - out.pop(); - } + if resolve.version() >= ResolveVersion::V2 { + while out.ends_with("\n\n") { + out.pop(); } } diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 18790abc476..3e66dd3cda8 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -126,10 +126,12 @@ impl<'cfg> Source for GitSource<'cfg> { // database, then try to resolve our reference with the preexisting // repository. (None, Some(db)) if self.config.offline() => { - let rev = db.resolve(&self.manifest_reference).with_context(|| { - "failed to lookup reference in preexisting repository, and \ - can't check for updates in offline mode (--offline)" - })?; + let rev = db + .resolve(&self.manifest_reference, None) + .with_context(|| { + "failed to lookup reference in preexisting repository, and \ + can't check for updates in offline mode (--offline)" + })?; (db, rev) } diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index fec8628b440..a7e90f10444 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -1,3 +1,111 @@ +//! Utilities for handling git repositories, mainly around +//! authentication/cloning. +//! +//! # `DefaultBranch` vs `Branch("master")` +//! +//! Long ago in a repository not so far away, an author (*cough* me *cough*) +//! didn't understand how branches work in Git. This led the author to +//! interpret these two dependency declarations the exact same way with the +//! former literally internally desugaring to the latter: +//! +//! ```toml +//! [dependencies] +//! foo = { git = "https://example.org/foo" } +//! foo = { git = "https://example.org/foo", branch = "master" } +//! ``` +//! +//! It turns out there's this things called `HEAD` in git remotes which points +//! to the "main branch" of a repository, and the main branch is not always +//! literally called master. What Cargo would like to do is to differentiate +//! these two dependency directives, with the first meaning "depend on `HEAD`". +//! +//! Unfortunately implementing this is a breaking change. This was first +//! attempted in #8364 but resulted in #8468 which has two independent bugs +//! listed on that issue. Despite this breakage we would still like to roll out +//! this change in Cargo, but we're now going to take it very slow and try to +//! break as few people as possible along the way. These comments are intended +//! to log the current progress and what wonkiness you might see within Cargo +//! when handling `DefaultBranch` vs `Branch("master")` +//! +//! ### Repositories with `master` and a default branch +//! +//! This is one of the most obvious sources of breakage. If our `foo` example +//! in above had two branches, one called `master` and another which was +//! actually the main branch, then Cargo's change will always be a breaking +//! change. This is because what's downloaded is an entirely different branch +//! if we change the meaning of the dependency directive. +//! +//! It's expected this is quite rare, but to handle this case nonetheless when +//! Cargo fetches from a git remote and the dependency specification is +//! `DefaultBranch` then it will issue a warning if the `HEAD` reference +//! doesn't match `master`. It's expected in this situation that authors will +//! fix builds locally by specifying `branch = 'master'`. +//! +//! ### Differences in `cargo vendor` configuration +//! +//! When executing `cargo vendor` it will print out configuration which can +//! then be used to configure Cargo to use the `vendor` directory. Historically +//! this configuration looked like: +//! +//! ```toml +//! [source."https://example.org/foo"] +//! git = "https://example.org/foo" +//! branch = "master" +//! replace-with = "vendored-sources" +//! ``` +//! +//! We would like to, however, transition this to not include the `branch = +//! "master"` unless the dependency directive actually mentions a branch. +//! Conveniently older Cargo implementations all interpret a missing `branch` +//! as `branch = "master"` so it's a backwards-compatible change to remove the +//! `branch = "master"` directive. As a result, `cargo vendor` will no longer +//! emit a `branch` if the git reference is `DefaultBranch` +//! +//! ### Differences in lock file formats +//! +//! Another issue pointed out in #8364 was that `Cargo.lock` files were no +//! longer compatible on stable and nightly with each other. The underlying +//! issue is that Cargo was serializing `branch = "master"` *differently* on +//! nightly than it was on stable. Historical implementations of Cargo would +//! encode `DefaultBranch` and `Branch("master")` the same way in `Cargo.lock`, +//! so when reading a lock file we have no way of differentiating between the +//! two. +//! +//! To handle this difference in encoding of `Cargo.lock` we'll be employing +//! the standard scheme to change `Cargo.lock`: +//! +//! * Add support in Cargo for a future format, don't turn it on. +//! * Wait a long time +//! * Turn on the future format +//! +//! Here the "future format" is `branch=master` shows up if you have a `branch` +//! in `Cargo.toml`, and otherwise nothing shows up in URLs. Due to the effect +//! on crate graph resolution, however, this flows into the next point.. +//! +//! ### Unification in the Cargo dependency graph +//! +//! Today dependencies with `branch = "master"` will unify with dependencies +//! that say nothing. (that's because the latter simply desugars). This means +//! the two `foo` directives above will resolve to the same dependency. +//! +//! The best idea I've got to fix this is to basically get everyone (if anyone) +//! to stop doing this today. The crate graph resolver will start to warn if it +//! detects that multiple `Cargo.toml` directives are detected and mixed. The +//! thinking is that when we turn on the new lock file format it'll also be +//! hard breaking change for any project which still has dependencies to +//! both the `master` branch and not. +//! +//! ### What we're doing today +//! +//! The general goal of Cargo today is to internally distinguish +//! `DefaultBranch` and `Branch("master")`, but for the time being they should +//! be functionally equivalent in terms of builds. The hope is that we'll let +//! all these warnings and such bake for a good long time, and eventually we'll +//! flip some switches if your build has no warnings it'll work before and +//! after. +//! +//! That's the dream at least, we'll see how this plays out. + use crate::core::GitReference; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::paths; @@ -74,7 +182,7 @@ impl GitRemote { } pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult { - reference.resolve(&self.db_at(path)?.repo) + reference.resolve(&self.db_at(path)?.repo, None) } pub fn checkout( @@ -99,7 +207,7 @@ impl GitRemote { } } None => { - if let Ok(rev) = reference.resolve(&db.repo) { + if let Ok(rev) = reference.resolve(&db.repo, Some((&self.url, cargo_config))) { return Ok((db, rev)); } } @@ -118,7 +226,7 @@ impl GitRemote { .context(format!("failed to clone into: {}", into.display()))?; let rev = match locked_rev { Some(rev) => rev, - None => reference.resolve(&repo)?, + None => reference.resolve(&repo, Some((&self.url, cargo_config)))?, }; Ok(( @@ -187,13 +295,21 @@ impl GitDatabase { self.repo.revparse_single(&oid.to_string()).is_ok() } - pub fn resolve(&self, r: &GitReference) -> CargoResult { - r.resolve(&self.repo) + pub fn resolve( + &self, + r: &GitReference, + remote_and_config: Option<(&Url, &Config)>, + ) -> CargoResult { + r.resolve(&self.repo, remote_and_config) } } impl GitReference { - pub fn resolve(&self, repo: &git2::Repository) -> CargoResult { + pub fn resolve( + &self, + repo: &git2::Repository, + remote_and_config: Option<(&Url, &Config)>, + ) -> CargoResult { let id = match self { // Note that we resolve the named tag here in sync with where it's // fetched into via `fetch` below. @@ -217,12 +333,39 @@ impl GitReference { .target() .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))? } + + // See the module docs for why we're using `master` here. GitReference::DefaultBranch => { - let refname = "refs/remotes/origin/HEAD"; - let id = repo.refname_to_id(refname)?; - let obj = repo.find_object(id, None)?; - let obj = obj.peel(ObjectType::Commit)?; - obj.id() + let master = repo + .find_branch("origin/master", git2::BranchType::Remote) + .chain_err(|| "failed to find branch `master`")?; + let master = master + .get() + .target() + .ok_or_else(|| anyhow::format_err!("branch `master` did not have a target"))?; + + if let Some((remote, config)) = remote_and_config { + let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?; + let head = repo.find_object(head_id, None)?; + let head = head.peel(ObjectType::Commit)?.id(); + + if head != master { + config.shell().warn(&format!( + "\ + fetching `master` branch from `{}` but the `HEAD` \ + reference for this repository is not the \ + `master` branch. This behavior will change \ + in Cargo in the future and your build may \ + break, so it's recommended to place \ + `branch = \"master\"` in Cargo.toml when \ + depending on this git repository to ensure \ + that your build will continue to work.\ + ", + remote, + ))?; + } + } + master } GitReference::Rev(s) => { @@ -757,6 +900,8 @@ pub fn fetch( } GitReference::DefaultBranch => { + // See the module docs for why we're fetching `master` here. + refspecs.push(format!("refs/heads/master:refs/remotes/origin/master")); refspecs.push(String::from("HEAD:refs/remotes/origin/HEAD")); } @@ -1025,7 +1170,10 @@ fn github_up_to_date( handle.useragent("cargo")?; let mut headers = List::new(); headers.append("Accept: application/vnd.github.3.sha")?; - headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?; + headers.append(&format!( + "If-None-Match: \"{}\"", + reference.resolve(repo, None)? + ))?; handle.http_headers(headers)?; handle.perform()?; Ok(handle.response_code()? == 304) diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index f79c397b9ec..2e9ffeabc0b 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -102,7 +102,7 @@ impl<'cfg> RemoteRegistry<'cfg> { fn head(&self) -> CargoResult { if self.head.get().is_none() { let repo = self.repo()?; - let oid = self.index_git_ref.resolve(repo)?; + let oid = self.index_git_ref.resolve(repo, None)?; self.head.set(Some(oid)); } Ok(self.head.get().unwrap()) diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 212869af8bc..92f37e3adb4 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -5,6 +5,7 @@ use std::fs; use std::io::prelude::*; use std::net::{TcpListener, TcpStream}; use std::path::Path; +use std::str; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread; @@ -2789,19 +2790,23 @@ to proceed despite [..] fn default_not_master() { let project = project(); - // Create a repository with a `master` branch ... + // Create a repository with a `master` branch, but switch the head to a + // branch called `main` at the same time. let (git_project, repo) = git::new_repo("dep1", |project| { - project.file("Cargo.toml", &basic_lib_manifest("dep1")) + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "pub fn foo() {}") }); + let head_id = repo.head().unwrap().target().unwrap(); + let head = repo.find_commit(head_id).unwrap(); + repo.branch("main", &head, false).unwrap(); + repo.set_head("refs/heads/main").unwrap(); - // Then create a `main` branch with actual code, and set the head of the - // repository (default branch) to `main`. - git_project.change_file("src/lib.rs", "pub fn foo() {}"); + // Then create a commit on the new `main` branch so `master` and `main` + // differ. + git_project.change_file("src/lib.rs", ""); git::add(&repo); - let rev = git::commit(&repo); - let commit = repo.find_commit(rev).unwrap(); - repo.branch("main", &commit, false).unwrap(); - repo.set_head("refs/heads/main").unwrap(); + git::commit(&repo); let project = project .file( @@ -2826,9 +2831,191 @@ fn default_not_master() { .with_stderr( "\ [UPDATING] git repository `[..]` +warning: fetching `master` branch from `[..]` but the `HEAD` \ + reference for this repository is not the \ + `master` branch. This behavior will change \ + in Cargo in the future and your build may \ + break, so it's recommended to place \ + `branch = \"master\"` in Cargo.toml when \ + depending on this git repository to ensure \ + that your build will continue to work. [COMPILING] dep1 v0.5.0 ([..]) [COMPILING] foo v0.5.0 ([..]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]", ) .run(); } + +#[cargo_test] +fn historical_lockfile_works() { + let project = project(); + + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + let head_id = repo.head().unwrap().target().unwrap(); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}', branch = 'master' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .build(); + + project.cargo("build").run(); + project.change_file( + "Cargo.lock", + &format!( + r#"# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "dep1" +version = "0.5.0" +source = "git+{}#{}" + +[[package]] +name = "foo" +version = "0.5.0" +dependencies = [ + "dep1", +] +"#, + git_project.url(), + head_id + ), + ); + project + .cargo("build") + .with_stderr("[FINISHED] [..]\n") + .run(); +} + +#[cargo_test] +fn historical_lockfile_works_with_vendor() { + let project = project(); + + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + let head_id = repo.head().unwrap().target().unwrap(); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}', branch = 'master' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .build(); + + let output = project.cargo("vendor").exec_with_output().unwrap(); + project.change_file(".cargo/config", str::from_utf8(&output.stdout).unwrap()); + project.change_file( + "Cargo.lock", + &format!( + r#"# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "dep1" +version = "0.5.0" +source = "git+{}#{}" + +[[package]] +name = "foo" +version = "0.5.0" +dependencies = [ + "dep1", +] +"#, + git_project.url(), + head_id + ), + ); + project.cargo("build").run(); +} + +#[cargo_test] +fn two_dep_forms() { + let project = project(); + + let (git_project, _repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}', branch = 'master' }} + a = {{ path = 'a' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .file( + "a/Cargo.toml", + &format!( + r#" + [project] + name = "a" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("a/src/lib.rs", "") + .build(); + + project + .cargo("build") + .with_stderr( + "\ +[UPDATING] [..] +warning: two git dependencies found for `[..]` where one uses `branch = \"master\"` \ +and the other doesn't; this will break in a future version of Cargo, so please \ +ensure the dependency forms are consistent +warning: [..] +[COMPILING] [..] +[COMPILING] [..] +[COMPILING] [..] +[FINISHED] [..] +", + ) + .run(); +} diff --git a/tests/testsuite/lockfile_compat.rs b/tests/testsuite/lockfile_compat.rs index 9f4b28b8b4d..7dec021058f 100644 --- a/tests/testsuite/lockfile_compat.rs +++ b/tests/testsuite/lockfile_compat.rs @@ -2,7 +2,7 @@ use cargo_test_support::git; use cargo_test_support::registry::Package; -use cargo_test_support::{basic_manifest, lines_match, project}; +use cargo_test_support::{basic_lib_manifest, basic_manifest, lines_match, project}; #[cargo_test] fn oldest_lockfile_still_works() { @@ -636,3 +636,89 @@ dependencies = [ let lock = p.read_lockfile(); assert_lockfiles_eq(&lockfile, &lock); } + +#[cargo_test] +fn v3_and_git() { + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + let head_id = repo.head().unwrap().target().unwrap(); + + let lockfile = format!( + r#"# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "dep1" +version = "0.5.0" +source = "git+{}?branch=master#{}" + +[[package]] +name = "foo" +version = "0.0.1" +dependencies = [ + "dep1", +] +"#, + git_project.url(), + head_id, + ); + + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + dep1 = {{ git = '{}', branch = 'master' }} + "#, + git_project.url(), + ), + ) + .file("src/lib.rs", "") + .file("Cargo.lock", "version = 3") + .build(); + + p.cargo("fetch").run(); + + let lock = p.read_lockfile(); + assert_lockfiles_eq(&lockfile, &lock); +} + +#[cargo_test] +fn lock_from_the_future() { + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + "#, + ) + .file("src/lib.rs", "") + .file("Cargo.lock", "version = 10000000") + .build(); + + p.cargo("fetch") + .with_stderr( + "\ +error: failed to parse lock file at: [..] + +Caused by: + lock file version `10000000` was found, but this version of Cargo does not \ + understand this lock file, perhaps Cargo needs to be updated? +", + ) + .with_status(101) + .run(); +}