diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-19 09:25:53 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-19 09:25:53 +0000 |
commit | 73e0a5b7696ea019ba35b89f38fc8e7b285d99cb (patch) | |
tree | 0d2e175af6f114cb50a675bec0bc76e12e1bceb4 /vendor/gix-diff/src | |
parent | Adding upstream version 1.75.0+dfsg1. (diff) | |
download | rustc-upstream.tar.xz rustc-upstream.zip |
Adding upstream version 1.76.0+dfsg1.upstream/1.76.0+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-diff/src')
-rw-r--r-- | vendor/gix-diff/src/blob.rs | 3 | ||||
-rw-r--r-- | vendor/gix-diff/src/blob/mod.rs | 133 | ||||
-rw-r--r-- | vendor/gix-diff/src/blob/pipeline.rs | 538 | ||||
-rw-r--r-- | vendor/gix-diff/src/blob/platform.rs | 619 | ||||
-rw-r--r-- | vendor/gix-diff/src/lib.rs | 41 | ||||
-rw-r--r-- | vendor/gix-diff/src/rewrites/mod.rs | 71 | ||||
-rw-r--r-- | vendor/gix-diff/src/rewrites/tracker.rs | 620 | ||||
-rw-r--r-- | vendor/gix-diff/src/tree/changes.rs | 63 | ||||
-rw-r--r-- | vendor/gix-diff/src/tree/visit.rs | 40 |
9 files changed, 2081 insertions, 47 deletions
diff --git a/vendor/gix-diff/src/blob.rs b/vendor/gix-diff/src/blob.rs deleted file mode 100644 index 27c1a1317..000000000 --- a/vendor/gix-diff/src/blob.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff), -//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe). -pub use imara_diff::*; diff --git a/vendor/gix-diff/src/blob/mod.rs b/vendor/gix-diff/src/blob/mod.rs new file mode 100644 index 000000000..0c76c2d91 --- /dev/null +++ b/vendor/gix-diff/src/blob/mod.rs @@ -0,0 +1,133 @@ +//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff), +//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe). +use std::{collections::HashMap, path::PathBuf}; + +use bstr::BString; +pub use imara_diff::*; + +/// +pub mod pipeline; + +/// +pub mod platform; + +/// Information about the diff performed to detect similarity. +#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] +pub struct DiffLineStats { + /// The amount of lines to remove from the source to get to the destination. + pub removals: u32, + /// The amount of lines to add to the source to get to the destination. + pub insertions: u32, + /// The amount of lines of the previous state, in the source. + pub before: u32, + /// The amount of lines of the new state, in the destination. + pub after: u32, + /// A range from 0 to 1.0, where 1.0 is a perfect match and 0.5 is a similarity of 50%. + /// Similarity is the ratio between all lines in the previous blob and the current blob, + /// calculated as `(old_lines_count - new_lines_count) as f32 / old_lines_count.max(new_lines_count) as f32`. + pub similarity: f32, +} + +/// A way to classify a resource suitable for diffing. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum ResourceKind { + /// The source of a rewrite, rename or copy operation, or generally the old version of a resource. + OldOrSource, + /// The destination of a rewrite, rename or copy operation, or generally the new version of a resource. + NewOrDestination, +} + +/// A set of values to define how to diff something that is associated with it using `git-attributes`, relevant for regular files. +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Driver { + /// The name of the driver, as referred to by `[diff "name"]` in the git configuration. + pub name: BString, + /// The command to execute to perform the diff entirely like `<command> old-file old-hex old-mode new-file new-hex new-mode`. + /// + /// Please note that we don't make this call ourselves, but use it to determine that we should not run the our standard + /// built-in algorithm but bail instead as the output of such a program isn't standardized. + pub command: Option<BString>, + /// The per-driver algorithm to use. + pub algorithm: Option<Algorithm>, + /// The external filter program to call like `<binary_to_text_command> /path/to/blob` which outputs a textual version of the provided + /// binary file. + /// Note that it's invoked with a shell if arguments are given. + /// Further, if present, it will always be executed, whether `is_binary` is set or not. + pub binary_to_text_command: Option<BString>, + /// `Some(true)` if this driver deals with binary files, which means that a `binary_to_text_command` should be used to convert binary + /// into a textual representation. + /// Without such a command, anything that is considered binary is not diffed, but only the size of its data is made available. + /// If `Some(false)`, it won't be considered binary, and the its data will not be sampled for the null-byte either. + /// Leaving it to `None` means binary detection is automatic, and is based on the presence of the `0` byte in the first 8kB of the buffer. + pub is_binary: Option<bool>, +} + +/// A conversion pipeline to take an object or path from what's stored in `git` to what can be diffed, while +/// following the guidance of git-attributes at the respective path to learn if diffing should happen or if +/// the content is considered binary. +/// +/// There are two different conversion flows, where the target of the flow is a buffer with diffable content: +// TODO: update this with information about possible directions. +/// +/// * `worktree on disk` -> `text conversion` +/// * `object` -> `worktree-filters` -> `text conversion` +#[derive(Clone)] +pub struct Pipeline { + /// A way to read data directly from the worktree. + pub roots: pipeline::WorktreeRoots, + /// A pipeline to convert objects from what's stored in `git` to its worktree version. + pub worktree_filter: gix_filter::Pipeline, + /// Options affecting the way we read files. + pub options: pipeline::Options, + /// Drivers to help customize the conversion behaviour depending on the location of items. + drivers: Vec<Driver>, + /// Pre-configured attributes to obtain additional diff-related information. + attrs: gix_filter::attributes::search::Outcome, + /// A buffer to manipulate paths + path: PathBuf, +} + +/// A utility for performing a diff of two blobs, including flexible conversions, conversion-caching +/// acquisition of diff information. +/// Note that this instance will not call external filters as their output can't be known programmatically, +/// but it allows to prepare their input if the caller wishes to perform this task. +/// +/// Optimized for NxM lookups with built-in caching. +#[derive(Clone)] +pub struct Platform { + /// The old version of a diff-able blob, if set. + old: Option<platform::CacheKey>, + /// The new version of a diff-able blob, if set. + new: Option<platform::CacheKey>, + + /// Options to alter how diffs should be performed. + pub options: platform::Options, + /// A way to convert objects into a diff-able format. + pub filter: Pipeline, + /// A way to access .gitattributes + pub attr_stack: gix_worktree::Stack, + + /// The way we convert resources into diffable states. + filter_mode: pipeline::Mode, + /// A continuously growing cache keeping ready-for-diff blobs by their path in the worktree, + /// as that is what affects their final diff-able state. + /// + /// That way, expensive rewrite-checks with NxM matrix checks would be as fast as possible, + /// avoiding duplicate work. + diff_cache: HashMap<platform::CacheKey, platform::CacheValue>, +} + +mod impls { + use crate::blob::ResourceKind; + + impl std::fmt::Display for ResourceKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + ResourceKind::OldOrSource => "old", + ResourceKind::NewOrDestination => "new", + }) + } + } +} diff --git a/vendor/gix-diff/src/blob/pipeline.rs b/vendor/gix-diff/src/blob/pipeline.rs new file mode 100644 index 000000000..58dddd90b --- /dev/null +++ b/vendor/gix-diff/src/blob/pipeline.rs @@ -0,0 +1,538 @@ +use std::{ + io::{Read, Write}, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; + +use bstr::{BStr, ByteSlice}; +use gix_filter::{ + driver::apply::{Delay, MaybeDelayed}, + pipeline::convert::{ToGitOutcome, ToWorktreeOutcome}, +}; +use gix_object::tree::EntryKind; + +use crate::blob::{Driver, Pipeline, ResourceKind}; + +/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree. +#[derive(Clone, Debug, Default)] +pub struct WorktreeRoots { + /// A place where the source of a rewrite, rename or copy, or generally the previous version of resources, are located. + pub old_root: Option<PathBuf>, + /// A place where the destination of a rewrite, rename or copy, or generally the new version of resources, are located. + pub new_root: Option<PathBuf>, +} + +impl WorktreeRoots { + /// Return the root path for the given `kind` + pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { + match kind { + ResourceKind::OldOrSource => self.old_root.as_deref(), + ResourceKind::NewOrDestination => self.new_root.as_deref(), + } + } +} + +/// Data as part of an [Outcome]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub enum Data { + /// The data to use for diffing was written into the buffer that was passed during the call to [`Pipeline::convert_to_diffable()`]. + Buffer, + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be diffed. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in git, and once it's the size of file + /// in the worktree. + size: u64, + }, +} + +/// The outcome returned by [Pipeline::convert_to_diffable()](super::Pipeline::convert_to_diffable()). +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub struct Outcome { + /// If available, an index into the `drivers` field to access more diff-related information of the driver for items + /// at the given path, as previously determined by git-attributes. + /// + /// Note that drivers are queried even if there is no object available. + pub driver_index: Option<usize>, + /// The data itself, suitable for diffing, and if the object or worktree item is present at all. + pub data: Option<Data>, +} + +/// Options for use in a [`Pipeline`]. +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// The amount of bytes that an object has to reach before being treated as binary. + /// These objects will not be queried, nor will their data be processed in any way. + /// If `0`, no file is ever considered binary due to their size. + /// + /// Note that for files stored in `git`, what counts is their stored, decompressed size, + /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets + /// them + pub large_file_threshold_bytes: u64, + /// Capabilities of the file system which affect how we read worktree files. + pub fs: gix_fs::Capabilities, +} + +/// The specific way to convert a resource. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Mode { + /// Always prepare the version of the resource as it would be in the work-tree, and + /// apply binary-to-text filters if present. + /// + /// This is typically free for resources in the worktree, and will apply filters to resources in the + /// object database. + #[default] + ToWorktreeAndBinaryToText, + /// Prepare the version of the resource as it would be in the work-tree if + /// binary-to-text filters are present (and apply them), or use the version in `git` otherwise. + ToGitUnlessBinaryToTextIsPresent, + /// Always prepare resources as they are stored in `git`. + /// + /// This is usually fastest, even though resources in the worktree needed to be converted files. + ToGit, +} + +impl Mode { + fn to_worktree(self) -> bool { + matches!( + self, + Mode::ToGitUnlessBinaryToTextIsPresent | Mode::ToWorktreeAndBinaryToText + ) + } + + fn to_git(self) -> bool { + matches!(self, Mode::ToGitUnlessBinaryToTextIsPresent | Mode::ToGit) + } +} + +/// +pub mod convert_to_diffable { + use bstr::BString; + use gix_object::tree::EntryKind; + + /// The error returned by [Pipeline::convert_to_diffable()](super::Pipeline::convert_to_diffable()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")] + InvalidEntryKind { rela_path: BString, actual: EntryKind }, + #[error("Entry at '{rela_path}' could not be read as symbolic link")] + ReadLink { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be opened for reading or read from")] + OpenOrRead { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")] + StreamCopy { rela_path: BString, source: std::io::Error }, + #[error("Failed to run '{cmd}' for binary-to-text conversion of entry at {rela_path}")] + RunTextConvFilter { + rela_path: BString, + cmd: String, + source: std::io::Error, + }, + #[error("Tempfile for binary-to-text conversion for entry at {rela_path} could not be created")] + CreateTempfile { rela_path: BString, source: std::io::Error }, + #[error("Binary-to-text conversion '{cmd}' for entry at {rela_path} failed with: {stderr}")] + TextConvFilterFailed { + rela_path: BString, + cmd: String, + stderr: BString, + }, + #[error(transparent)] + FindObject(#[from] gix_object::find::existing_object::Error), + #[error(transparent)] + ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error), + #[error(transparent)] + ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error), + } +} + +/// Lifecycle +impl Pipeline { + /// Create a new instance of a pipeline which produces blobs suitable for diffing. `roots` allow to read worktree files directly, otherwise + /// `worktree_filter` is used to transform object database data directly. `drivers` further configure individual paths. + /// `options` are used to further configure the way we act.. + pub fn new( + roots: WorktreeRoots, + worktree_filter: gix_filter::Pipeline, + mut drivers: Vec<super::Driver>, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); + Pipeline { + roots, + worktree_filter, + drivers, + options, + attrs: { + let mut out = gix_filter::attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("diff")); + out + }, + path: Default::default(), + } + } +} + +/// Access +impl Pipeline { + /// Return all drivers that this instance was initialized with. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + +/// Conversion +impl Pipeline { + /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`. + /// The resulting diff-able data is written into `out`, assuming it's not too large. The returned [`Outcome`] + /// contains information on how to use `out`, or if it's filled at all. + /// + /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is + /// a resource in the object database, i.e. has no worktree root available. + /// + /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case + /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`. + /// + /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode. + /// + /// Use `convert` to control what kind of the resource will be produced. + /// + /// ### About Tempfiles + /// + /// When querying from the object database and a binary and a [binary-to-text](Driver::binary_to_text_command) is set, + /// a temporary file will be created to serve as input for the converter program, containing the worktree-data that + /// exactly as it would be present in the worktree if checked out. + /// + /// As these files are ultimately named tempfiles, they will be leaked unless the [gix_tempfile] is configured with + /// a signal handler. If they leak, they would remain in the system's `$TMP` directory. + #[allow(clippy::too_many_arguments)] + pub fn convert_to_diffable( + &mut self, + id: &gix_hash::oid, + mode: EntryKind, + rela_path: &BStr, + kind: ResourceKind, + attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome), + objects: &dyn gix_object::FindObjectOrHeader, + convert: Mode, + out: &mut Vec<u8>, + ) -> Result<Outcome, convert_to_diffable::Error> { + let is_symlink = match mode { + EntryKind::Link if self.options.fs.symlink => true, + EntryKind::Blob | EntryKind::BlobExecutable => false, + _ => { + return Err(convert_to_diffable::Error::InvalidEntryKind { + rela_path: rela_path.to_owned(), + actual: mode, + }) + } + }; + + out.clear(); + attributes(rela_path, &mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let driver_index = attr + .assignment + .state + .as_bstr() + .and_then(|name| self.drivers.binary_search_by(|d| d.name.as_bstr().cmp(name)).ok()); + let driver = driver_index.map(|idx| &self.drivers[idx]); + let mut is_binary = if let Some(driver) = driver { + driver + .is_binary + .map(|is_binary| is_binary && driver.binary_to_text_command.is_none()) + } else { + attr.assignment.state.is_unset().then_some(true) + }; + match self.roots.by_kind(kind) { + Some(root) => { + self.path.clear(); + self.path.push(root); + self.path.push(gix_path::from_bstr(rela_path)); + let data = if is_symlink { + let target = none_if_missing(std::fs::read_link(&self.path)).map_err(|err| { + convert_to_diffable::Error::ReadLink { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + target.map(|target| { + out.extend_from_slice(gix_path::into_bstr(target).as_ref()); + Data::Buffer + }) + } else { + let need_size_only = is_binary == Some(true); + let size_in_bytes = (need_size_only + || (is_binary != Some(false) && self.options.large_file_threshold_bytes > 0)) + .then(|| { + none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| { + convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + }) + }) + .transpose()?; + match size_in_bytes { + Some(None) => None, // missing as identified by the size check + Some(Some(size)) if size > self.options.large_file_threshold_bytes || need_size_only => { + Some(Data::Binary { size }) + } + _ => { + match driver + .filter(|_| convert.to_worktree()) + .and_then(|d| d.prepare_binary_to_text_cmd(&self.path)) + { + Some(cmd) => { + // Avoid letting the driver program fail if it doesn't exist. + if self.options.large_file_threshold_bytes == 0 + && none_if_missing(std::fs::symlink_metadata(&self.path)) + .map_err(|err| convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + })? + .is_none() + { + None + } else { + run_cmd(rela_path, cmd, out)?; + Some(Data::Buffer) + } + } + None => { + let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| { + convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + + match file { + Some(mut file) => { + if convert.to_git() { + let res = self.worktree_filter.convert_to_git( + file, + gix_path::from_bstr(rela_path).as_ref(), + attributes, + &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + )?; + + match res { + ToGitOutcome::Unchanged(mut file) => { + file.read_to_end(out).map_err(|err| { + convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Process(mut stream) => { + stream.read_to_end(out).map_err(|err| { + convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Buffer(buf) => { + out.resize(buf.len(), 0); + out.copy_from_slice(buf); + } + } + } else { + file.read_to_end(out).map_err(|err| { + convert_to_diffable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + + Some(if is_binary.unwrap_or_else(|| is_binary_buf(out)) { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + }) + } + None => None, + } + } + } + } + } + }; + Ok(Outcome { driver_index, data }) + } + None => { + let data = if id.is_null() { + None + } else { + let header = objects + .try_header(id) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + if is_binary.is_none() + && self.options.large_file_threshold_bytes > 0 + && header.size > self.options.large_file_threshold_bytes + { + is_binary = Some(true); + }; + let data = if is_binary == Some(true) { + Data::Binary { size: header.size } + } else { + objects + .try_find(id, out) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + if matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) + && convert == Mode::ToWorktreeAndBinaryToText + || (convert == Mode::ToGitUnlessBinaryToTextIsPresent + && driver.map_or(false, |d| d.binary_to_text_command.is_some())) + { + let res = + self.worktree_filter + .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + + let cmd_and_file = driver + .and_then(|d| { + d.binary_to_text_command.is_some().then(|| { + gix_tempfile::new( + std::env::temp_dir(), + gix_tempfile::ContainingDirectory::Exists, + gix_tempfile::AutoRemove::Tempfile, + ) + .and_then(|mut tmp_file| { + self.path.clear(); + tmp_file.with_mut(|tmp| self.path.push(tmp.path()))?; + Ok(tmp_file) + }) + .map(|tmp_file| { + ( + d.prepare_binary_to_text_cmd(&self.path) + .expect("always get cmd if command is set"), + tmp_file, + ) + }) + }) + }) + .transpose() + .map_err(|err| convert_to_diffable::Error::CreateTempfile { + source: err, + rela_path: rela_path.to_owned(), + })?; + match cmd_and_file { + Some((cmd, mut tmp_file)) => { + match res { + ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => { + tmp_file.write_all(buf) + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, &mut tmp_file).map(|_| ()) + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + } + .map_err(|err| { + convert_to_diffable::Error::CreateTempfile { + source: err, + rela_path: rela_path.to_owned(), + } + })?; + out.clear(); + run_cmd(rela_path, cmd, out)?; + } + None => { + match res { + ToWorktreeOutcome::Unchanged(_) => {} + ToWorktreeOutcome::Buffer(src) => { + out.resize(src.len(), 0); + out.copy_from_slice(src); + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, out).map_err(|err| { + convert_to_diffable::Error::StreamCopy { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + }; + } + } + } + + if driver.map_or(true, |d| d.binary_to_text_command.is_none()) + && is_binary.unwrap_or_else(|| is_binary_buf(out)) + { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + } + }; + Some(data) + }; + Ok(Outcome { driver_index, data }) + } + } + } +} + +fn is_binary_buf(buf: &[u8]) -> bool { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) +} + +fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> { + match res { + Ok(data) => Ok(Some(data)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} + +fn run_cmd(rela_path: &BStr, mut cmd: Command, out: &mut Vec<u8>) -> Result<(), convert_to_diffable::Error> { + gix_trace::debug!(cmd = ?cmd, "Running binary-to-text command"); + let mut res = cmd + .output() + .map_err(|err| convert_to_diffable::Error::RunTextConvFilter { + rela_path: rela_path.to_owned(), + cmd: format!("{cmd:?}"), + source: err, + })?; + if !res.status.success() { + return Err(convert_to_diffable::Error::TextConvFilterFailed { + rela_path: rela_path.to_owned(), + cmd: format!("{cmd:?}"), + stderr: res.stderr.into(), + }); + } + out.append(&mut res.stdout); + Ok(()) +} + +impl Driver { + /// Produce an invocable command pre-configured to produce the filtered output on stdout after reading `path`. + pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option<std::process::Command> { + let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref(); + let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned()) + .with_shell() + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg(path) + .into(); + Some(cmd) + } +} diff --git a/vendor/gix-diff/src/blob/platform.rs b/vendor/gix-diff/src/blob/platform.rs new file mode 100644 index 000000000..fb37b735c --- /dev/null +++ b/vendor/gix-diff/src/blob/platform.rs @@ -0,0 +1,619 @@ +use std::{io::Write, process::Stdio}; + +use bstr::{BStr, BString, ByteSlice}; + +use super::Algorithm; +use crate::blob::{pipeline, Pipeline, Platform, ResourceKind}; + +/// A key to uniquely identify either a location in the worktree, or in the object database. +#[derive(Clone)] +pub(crate) struct CacheKey { + id: gix_hash::ObjectId, + location: BString, + /// If `true`, this is an `id` based key, otherwise it's location based. + use_id: bool, + /// Only relevant when `id` is not null, to further differentiate content and allow us to + /// keep track of both links and blobs with the same content (rare, but possible). + is_link: bool, +} + +/// A stored value representing a diffable resource. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub(crate) struct CacheValue { + /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_diffable()]. + conversion: pipeline::Outcome, + /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`. + mode: gix_object::tree::EntryKind, + /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary. + buffer: Vec<u8>, +} + +impl std::hash::Hash for CacheKey { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + if self.use_id { + self.id.hash(state); + self.is_link.hash(state) + } else { + self.location.hash(state) + } + } +} + +impl PartialEq for CacheKey { + fn eq(&self, other: &Self) -> bool { + match (self.use_id, other.use_id) { + (false, false) => self.location.eq(&other.location), + (true, true) => self.id.eq(&other.id) && self.is_link.eq(&other.is_link), + _ => false, + } + } +} + +impl Eq for CacheKey {} + +impl Default for CacheKey { + fn default() -> Self { + CacheKey { + id: gix_hash::Kind::Sha1.null(), + use_id: false, + is_link: false, + location: BString::default(), + } + } +} + +impl CacheKey { + fn set_location(&mut self, rela_path: &BStr) { + self.location.clear(); + self.location.extend_from_slice(rela_path); + } +} + +/// A resource ready to be diffed in one way or another. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct Resource<'a> { + /// If available, an index into the `drivers` field to access more diff-related information of the driver for items + /// at the given path, as previously determined by git-attributes. + /// + /// Note that drivers are queried even if there is no object available. + pub driver_index: Option<usize>, + /// The data itself, suitable for diffing, and if the object or worktree item is present at all. + pub data: resource::Data<'a>, + /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`. + pub mode: gix_object::tree::EntryKind, + /// The location of the resource, relative to the working tree. + pub rela_path: &'a BStr, + /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at + /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which has to + /// go through a filter to be converted back to what `git` would store. + pub id: &'a gix_hash::oid, +} + +/// +pub mod resource { + use crate::blob::{ + pipeline, + platform::{CacheKey, CacheValue, Resource}, + }; + + impl<'a> Resource<'a> { + pub(crate) fn new(key: &'a CacheKey, value: &'a CacheValue) -> Self { + Resource { + driver_index: value.conversion.driver_index, + data: value.conversion.data.map_or(Data::Missing, |data| match data { + pipeline::Data::Buffer => Data::Buffer(&value.buffer), + pipeline::Data::Binary { size } => Data::Binary { size }, + }), + mode: value.mode, + rela_path: key.location.as_ref(), + id: &key.id, + } + } + + /// Produce an iterator over lines, separated by LF or CRLF, suitable to create tokens using + /// [`imara_diff::intern::InternedInput`]. + pub fn intern_source(&self) -> imara_diff::sources::ByteLines<'a, true> { + crate::blob::sources::byte_lines_with_terminator(self.data.as_slice().unwrap_or_default()) + } + } + + /// The data of a diffable resource, as it could be determined and computed previously. + #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] + pub enum Data<'a> { + /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. + Missing, + /// The textual data as processed to be in a diffable state. + Buffer(&'a [u8]), + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be diffed. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in git, and once it's the size of file + /// in the worktree. + size: u64, + }, + } + + impl<'a> Data<'a> { + /// Return ourselves as slice of bytes if this instance stores data. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + Data::Buffer(d) => Some(d), + Data::Binary { .. } | Data::Missing => None, + } + } + } +} + +/// +pub mod set_resource { + use bstr::BString; + + use crate::blob::{pipeline, ResourceKind}; + + /// The error returned by [Platform::set_resource](super::Platform::set_resource). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Can only diff blobs and links, not {mode:?}")] + InvalidMode { mode: gix_object::tree::EntryKind }, + #[error("Failed to read {kind} worktree data from '{rela_path}'")] + Io { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error("Failed to obtain attributes for {kind} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error(transparent)] + ConvertToDiffable(#[from] pipeline::convert_to_diffable::Error), + } +} + +/// +pub mod prepare_diff { + use bstr::BStr; + + use crate::blob::platform::Resource; + + /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation. + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + pub enum Operation<'a> { + /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments. + /// This only happens if none of the resources are binary, and if there is no external diff program configured via git-attributes + /// *or* [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured) + /// is `false`. + /// + /// Use [`Outcome::interned_input()`] to easily obtain an interner for use with [`imara_diff::diff()`], or maintain one yourself + /// for greater re-use. + InternalDiff { + /// The algorithm we determined should be used, which is one of (in order, first set one wins): + /// + /// * the driver's override + /// * the platforms own configuration (typically from git-config) + /// * the default algorithm + algorithm: imara_diff::Algorithm, + }, + /// Run the external diff program according as configured in the `source`-resources driver. + /// This only happens if [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured) + /// was `true`, preventing the usage of the internal diff implementation. + ExternalCommand { + /// The command as extracted from [Driver::command](super::super::Driver::command). + /// Use it in [`Platform::prepare_diff_command`](super::Platform::prepare_diff_command()) to easily prepare a compatible invocation. + command: &'a BStr, + }, + /// One of the involved resources, [`old`](Outcome::old) or [`new`](Outcome::new), were binary and thus no diff + /// cannot be performed. + SourceOrDestinationIsBinary, + } + + /// The outcome of a [`prepare_diff`](super::Platform::prepare_diff()) operation. + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + pub struct Outcome<'a> { + /// The kind of diff that was actually performed. This may include skipping the internal diff as well. + pub operation: Operation<'a>, + /// The old or source of the diff operation. + pub old: Resource<'a>, + /// The new or destination of the diff operation. + pub new: Resource<'a>, + } + + impl<'a> Outcome<'a> { + /// Produce an instance of an interner which `git` would use to perform diffs. + pub fn interned_input(&self) -> imara_diff::intern::InternedInput<&'a [u8]> { + crate::blob::intern::InternedInput::new(self.old.intern_source(), self.new.intern_source()) + } + } + + /// The error returned by [Platform::prepare_diff()](super::Platform::prepare_diff()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Either the source or the destination of the diff operation were not set")] + SourceOrDestinationUnset, + #[error("Tried to diff resources that are both considered removed")] + SourceAndDestinationRemoved, + } +} + +/// +pub mod prepare_diff_command { + use std::ops::{Deref, DerefMut}; + + use bstr::BString; + + /// The error returned by [Platform::prepare_diff_command()](super::Platform::prepare_diff_command()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Either the source or the destination of the diff operation were not set")] + SourceOrDestinationUnset, + #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")] + SourceOrDestinationBinary, + #[error( + "Tempfile to store content of '{rela_path}' for passing to external diff command could not be created" + )] + CreateTempfile { rela_path: BString, source: std::io::Error }, + #[error("Could not write content of '{rela_path}' to tempfile for passing to external diff command")] + WriteTempfile { rela_path: BString, source: std::io::Error }, + } + + /// The outcome of a [`prepare_diff_command`](super::Platform::prepare_diff_command()) operation. + /// + /// This type acts like [`std::process::Command`], ready to run, with `stdin`, `stdout` and `stderr` set to *inherit* + /// all handles as this is expected to be for visual inspection. + pub struct Command { + pub(crate) cmd: std::process::Command, + /// Possibly a tempfile to be removed after the run, or `None` if there is no old version. + pub(crate) old: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>, + /// Possibly a tempfile to be removed after the run, or `None` if there is no new version. + pub(crate) new: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>, + } + + impl Deref for Command { + type Target = std::process::Command; + + fn deref(&self) -> &Self::Target { + &self.cmd + } + } + + impl DerefMut for Command { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cmd + } + } +} + +/// Options for use in [Platform::new()]. +#[derive(Default, Copy, Clone)] +pub struct Options { + /// The algorithm to use when diffing. + /// If unset, it uses the [default algorithm](Algorithm::default()). + pub algorithm: Option<Algorithm>, + /// If `true`, default `false`, then an external `diff` configured using gitattributes and drivers, + /// will cause the built-in diff [to be skipped](prepare_diff::Operation::ExternalCommand). + /// Otherwise, the internal diff is called despite the configured external diff, which is + /// typically what callers expect by default. + pub skip_internal_diff_if_external_is_configured: bool, +} + +/// Lifecycle +impl Platform { + /// Create a new instance with `options`, and a way to `filter` data from the object database to data that is diff-able. + /// `filter_mode` decides how to do that specifically. + /// Use `attr_stack` to access attributes pertaining worktree filters and diff settings. + pub fn new( + options: Options, + filter: Pipeline, + filter_mode: pipeline::Mode, + attr_stack: gix_worktree::Stack, + ) -> Self { + Platform { + old: None, + new: None, + diff_cache: Default::default(), + options, + filter, + filter_mode, + attr_stack, + } + } +} + +/// Conversions +impl Platform { + /// Store enough information about a resource to eventually diff it, where… + /// + /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either + /// be a resource in the worktree, or it's considered a non-existing, deleted object. + /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided + /// for completeness. + /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `rela_path` is the relative path as seen from the (work)tree root. + /// * `kind` identifies the side of the diff this resource will be used for. + /// A diff needs both `OldOrSource` *and* `NewOrDestination`. + /// * `objects` provides access to the object database in case the resource can't be read from a worktree. + /// + /// Note that it's assumed that either `id + mode (` or `rela_path` can serve as unique identifier for the resource, + /// depending on whether or not a [worktree root](pipeline::WorktreeRoots) is set for the resource of `kind`, + /// with resources with worktree roots using the `rela_path` as unique identifier. + /// + /// ### Important + /// + /// If an error occours, the previous resource of `kind` will be cleared, preventing further diffs + /// unless another attempt succeeds. + pub fn set_resource( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, // TODO: make this `dyn` once https://github.com/rust-lang/rust/issues/65991 is stable, then also make tracker.rs `objects` dyn + ) -> Result<(), set_resource::Error> { + let res = self.set_resource_inner(id, mode, rela_path, kind, objects); + if res.is_err() { + *match kind { + ResourceKind::OldOrSource => &mut self.old, + ResourceKind::NewOrDestination => &mut self.new, + } = None; + } + res + } + + /// Given `diff_command` and `context`, typically obtained from git-configuration, and the currently set diff-resources, + /// prepare the invocation and temporary files needed to launch it according to protocol. + /// `count` / `total` are used for progress indication passed as environment variables `GIT_DIFF_PATH_(COUNTER|TOTAL)` + /// respectively (0-based), so the first path has `count=0` and `total=1` (assuming there is only one path). + /// Returns `None` if at least one resource is unset, see [`set_resource()`](Self::set_resource()). + /// + /// Please note that this is an expensive operation this will always create up to two temporary files to hold the data + /// for the old and new resources. + /// + /// ### Deviation + /// + /// If one of the resources is binary, the operation reports an error as such resources don't make their data available + /// which is required for the external diff to run. + pub fn prepare_diff_command( + &self, + diff_command: BString, + context: gix_command::Context, + count: usize, + total: usize, + ) -> Result<prepare_diff_command::Command, prepare_diff_command::Error> { + fn add_resource( + cmd: &mut std::process::Command, + res: Resource<'_>, + ) -> Result<Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>, prepare_diff_command::Error> { + let tmpfile = match res.data { + resource::Data::Missing => { + cmd.args(["/dev/null", ".", "."]); + None + } + resource::Data::Buffer(buf) => { + let mut tmp = gix_tempfile::new( + std::env::temp_dir(), + gix_tempfile::ContainingDirectory::Exists, + gix_tempfile::AutoRemove::Tempfile, + ) + .map_err(|err| prepare_diff_command::Error::CreateTempfile { + rela_path: res.rela_path.to_owned(), + source: err, + })?; + tmp.write_all(buf) + .map_err(|err| prepare_diff_command::Error::WriteTempfile { + rela_path: res.rela_path.to_owned(), + source: err, + })?; + tmp.with_mut(|f| { + cmd.arg(f.path()); + }) + .map_err(|err| prepare_diff_command::Error::WriteTempfile { + rela_path: res.rela_path.to_owned(), + source: err, + })?; + cmd.arg(res.id.to_string()).arg(res.mode.as_octal_str().to_string()); + let tmp = tmp.close().map_err(|err| prepare_diff_command::Error::WriteTempfile { + rela_path: res.rela_path.to_owned(), + source: err, + })?; + Some(tmp) + } + resource::Data::Binary { .. } => return Err(prepare_diff_command::Error::SourceOrDestinationBinary), + }; + Ok(tmpfile) + } + + let (old, new) = self + .resources() + .ok_or(prepare_diff_command::Error::SourceOrDestinationUnset)?; + let mut cmd: std::process::Command = gix_command::prepare(gix_path::from_bstring(diff_command)) + .with_context(context) + .env("GIT_DIFF_PATH_COUNTER", (count + 1).to_string()) + .env("GIT_DIFF_PATH_TOTAL", total.to_string()) + .stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .into(); + + cmd.arg(gix_path::from_bstr(old.rela_path).into_owned()); + let mut out = prepare_diff_command::Command { + cmd, + old: None, + new: None, + }; + + out.old = add_resource(&mut out.cmd, old)?; + out.new = add_resource(&mut out.cmd, new)?; + + if old.rela_path != new.rela_path { + out.cmd.arg(gix_path::from_bstr(new.rela_path).into_owned()); + } + + Ok(out) + } + + /// Returns the resource of the given kind if it was set. + pub fn resource(&self, kind: ResourceKind) -> Option<Resource<'_>> { + let key = match kind { + ResourceKind::OldOrSource => self.old.as_ref(), + ResourceKind::NewOrDestination => self.new.as_ref(), + }?; + Resource::new(key, self.diff_cache.get(key)?).into() + } + + /// Obtain the two resources that were previously set as `(OldOrSource, NewOrDestination)`, if both are set and available. + /// + /// This is useful if one wishes to manually prepare the diff, maybe for invoking external programs, instead of relying on + /// [`Self::prepare_diff()`]. + pub fn resources(&self) -> Option<(Resource<'_>, Resource<'_>)> { + let key = &self.old.as_ref()?; + let value = self.diff_cache.get(key)?; + let old = Resource::new(key, value); + + let key = &self.new.as_ref()?; + let value = self.diff_cache.get(key)?; + let new = Resource::new(key, value); + Some((old, new)) + } + + /// Prepare a diff operation on the [previously set](Self::set_resource()) [old](ResourceKind::OldOrSource) and + /// [new](ResourceKind::NewOrDestination) resources. + /// + /// The returned outcome allows to easily perform diff operations, based on the [`prepare_diff::Outcome::operation`] field, + /// which hints at what should be done. + pub fn prepare_diff(&mut self) -> Result<prepare_diff::Outcome<'_>, prepare_diff::Error> { + let old_key = &self.old.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?; + let old = self + .diff_cache + .get(old_key) + .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?; + let new_key = &self.new.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?; + let new = self + .diff_cache + .get(new_key) + .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?; + let mut out = prepare_diff::Outcome { + operation: prepare_diff::Operation::SourceOrDestinationIsBinary, + old: Resource::new(old_key, old), + new: Resource::new(new_key, new), + }; + + match (old.conversion.data, new.conversion.data) { + (None, None) => return Err(prepare_diff::Error::SourceAndDestinationRemoved), + (Some(pipeline::Data::Binary { .. }), _) | (_, Some(pipeline::Data::Binary { .. })) => return Ok(out), + _either_missing_or_non_binary => { + if let Some(command) = old + .conversion + .driver_index + .and_then(|idx| self.filter.drivers[idx].command.as_deref()) + .filter(|_| self.options.skip_internal_diff_if_external_is_configured) + { + out.operation = prepare_diff::Operation::ExternalCommand { + command: command.as_bstr(), + }; + return Ok(out); + } + } + } + + out.operation = prepare_diff::Operation::InternalDiff { + algorithm: old + .conversion + .driver_index + .and_then(|idx| self.filter.drivers[idx].algorithm) + .or(self.options.algorithm) + .unwrap_or_default(), + }; + Ok(out) + } + + /// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared. + /// + /// Use this method to clear the cache, releasing memory. Note that this will also loose all information about resources + /// which means diffs would fail unless the resources are set again. + /// + /// Note that this also has to be called if the same resource is going to be diffed in different states, i.e. using different + /// `id`s, but the same `rela_path`. + pub fn clear_resource_cache(&mut self) { + self.old = None; + self.new = None; + self.diff_cache.clear(); + } +} + +impl Platform { + fn set_resource_inner( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), set_resource::Error> { + if matches!( + mode, + gix_object::tree::EntryKind::Commit | gix_object::tree::EntryKind::Tree + ) { + return Err(set_resource::Error::InvalidMode { mode }); + } + let storage = match kind { + ResourceKind::OldOrSource => &mut self.old, + ResourceKind::NewOrDestination => &mut self.new, + } + .get_or_insert_with(Default::default); + + storage.id = id; + storage.set_location(rela_path); + storage.is_link = matches!(mode, gix_object::tree::EntryKind::Link); + storage.use_id = self.filter.roots.by_kind(kind).is_none(); + + if self.diff_cache.contains_key(storage) { + return Ok(()); + } + let entry = self + .attr_stack + .at_entry(rela_path, Some(false), objects) + .map_err(|err| set_resource::Error::Attributes { + source: err, + kind, + rela_path: rela_path.to_owned(), + })?; + let mut buf = Vec::new(); + let out = self.filter.convert_to_diffable( + &id, + mode, + rela_path, + kind, + &mut |_, out| { + let _ = entry.matching_attributes(out); + }, + objects, + self.filter_mode, + &mut buf, + )?; + let key = storage.clone(); + assert!( + self.diff_cache + .insert( + key, + CacheValue { + conversion: out, + mode, + buffer: buf, + }, + ) + .is_none(), + "The key impl makes clashes impossible with our usage" + ); + Ok(()) + } +} diff --git a/vendor/gix-diff/src/lib.rs b/vendor/gix-diff/src/lib.rs index 6d94a7591..1fe8d2e6b 100644 --- a/vendor/gix-diff/src/lib.rs +++ b/vendor/gix-diff/src/lib.rs @@ -1,13 +1,48 @@ //! Algorithms for diffing various git object types and for generating patches, highly optimized for performance. //! ## Feature Flags #![cfg_attr( -feature = "document-features", -cfg_attr(doc, doc = ::document_features::document_features!()) + all(doc, feature = "document-features"), + doc = ::document_features::document_features!() )] -#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] +#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms)] #![forbid(unsafe_code)] +/// Re-export for use in public API. +#[cfg(feature = "blob")] +pub use gix_command as command; +/// Re-export for use in public API. +#[cfg(feature = "blob")] +pub use gix_object as object; + +/// A structure to capture how to perform rename and copy tracking, used by the [rewrites::Tracker]. +#[derive(Debug, Copy, Clone, PartialEq)] +#[cfg(feature = "blob")] +pub struct Rewrites { + /// If `Some(…)`, also find copies. `None` is the default which does not try to detect copies at all. + /// + /// Note that this is an even more expensive operation than detecting renames stemming from additions and deletions + /// as the resulting set to search through is usually larger. + pub copies: Option<rewrites::Copies>, + /// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`. + /// This field is similar to `git diff -M50%`. + /// + /// If `None`, files are only considered equal if their content matches 100%. + /// Note that values greater than 1.0 have no different effect than 1.0. + pub percentage: Option<f32>, + /// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000 + /// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity. + /// If 0, there is no limit. + /// + /// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not + /// run the fuzzy version of identity tests at all. That way results are never partial. + pub limit: usize, +} + +/// Contains a [Tracker](rewrites::Tracker) to detect rewrites. +#[cfg(feature = "blob")] +pub mod rewrites; + /// pub mod tree; diff --git a/vendor/gix-diff/src/rewrites/mod.rs b/vendor/gix-diff/src/rewrites/mod.rs new file mode 100644 index 000000000..08d6f2cce --- /dev/null +++ b/vendor/gix-diff/src/rewrites/mod.rs @@ -0,0 +1,71 @@ +use crate::Rewrites; + +/// Types related to the rename tracker for renames, rewrites and copies. +pub mod tracker; + +/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes +/// of which some are retained to at a later stage compute the ones that seem to be renames or copies. +pub struct Tracker<T> { + /// The tracked items thus far, which will be used to determine renames/copies and rewrites later. + items: Vec<tracker::Item<T>>, + /// A place to store all paths in to reduce amount of allocations. + path_backing: Vec<u8>, + /// How to track copies and/or rewrites. + rewrites: Rewrites, +} + +/// Determine in which set of files to search for copies. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] +pub enum CopySource { + /// Find copies from the set of modified files only. + #[default] + FromSetOfModifiedFiles, + /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree). + /// + /// This can be an expensive operation as it scales exponentially with the total amount of files in the set. + FromSetOfModifiedFilesAndAllSources, +} + +/// Under which circumstances we consider a file to be a copy. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Copies { + /// The set of files to search when finding the source of copies. + pub source: CopySource, + /// Equivalent to [`Rewrites::percentage`], but used for copy tracking. + /// + /// Useful to have similarity-based rename tracking and cheaper copy tracking. + pub percentage: Option<f32>, +} + +impl Default for Copies { + fn default() -> Self { + Copies { + source: CopySource::default(), + percentage: Some(0.5), + } + } +} + +/// Information collected while handling rewrites of files which may be tracked. +#[derive(Default, Clone, Copy, Debug, PartialEq)] +pub struct Outcome { + /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration. + pub options: Rewrites, + /// The amount of similarity checks that have been conducted to find renamed files and potentially copies. + pub num_similarity_checks: usize, + /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it. + pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize, + /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it. + pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize, +} + +/// The default settings for rewrites according to the git configuration defaults. +impl Default for Rewrites { + fn default() -> Self { + Rewrites { + copies: None, + percentage: Some(0.5), + limit: 1000, + } + } +} diff --git a/vendor/gix-diff/src/rewrites/tracker.rs b/vendor/gix-diff/src/rewrites/tracker.rs new file mode 100644 index 000000000..95ebe7fab --- /dev/null +++ b/vendor/gix-diff/src/rewrites/tracker.rs @@ -0,0 +1,620 @@ +//! ### Deviation +//! +//! Note that the algorithm implemented here is in many ways different from what `git` does. +//! +//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match. +//! - the set used for copy-detection is probably smaller by default. +use std::ops::Range; + +use bstr::BStr; +use gix_object::tree::{EntryKind, EntryMode}; + +use crate::{ + blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind}, + rewrites::{CopySource, Outcome, Tracker}, + Rewrites, +}; + +/// The kind of a change. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, PartialEq, Eq)] +pub enum ChangeKind { + /// The change represents the *deletion* of an item. + Deletion, + /// The change represents the *modification* of an item. + Modification, + /// The change represents the *addition* of an item. + Addition, +} + +/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`]. +pub trait Change: Clone { + /// Return the hash of this change for identification. + /// + /// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace + /// conversions. + fn id(&self) -> &gix_hash::oid; + /// Return the kind of this change. + fn kind(&self) -> ChangeKind; + /// Return more information about the kind of entry affected by this change. + fn entry_mode(&self) -> EntryMode; + /// Return the id of the change along with its mode. + fn id_and_entry_mode(&self) -> (&gix_hash::oid, EntryMode); +} + +/// A set of tracked items allows to figure out their relations by figuring out their similarity. +pub(crate) struct Item<T> { + /// The underlying raw change + change: T, + /// That slice into the backing for paths. + path: Range<usize>, + /// If true, this item was already emitted, i.e. seen by the caller. + emitted: bool, +} + +impl<T: Change> Item<T> { + fn location<'a>(&self, backing: &'a [u8]) -> &'a BStr { + backing[self.path.clone()].as_ref() + } + fn entry_mode_compatible(&self, mode: EntryMode) -> bool { + use EntryKind::*; + matches!( + (mode.kind(), self.change.entry_mode().kind()), + (Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link) + ) + } + + fn is_source_for_destination_of(&self, kind: visit::SourceKind, dest_item_mode: EntryMode) -> bool { + self.entry_mode_compatible(dest_item_mode) + && match kind { + visit::SourceKind::Rename => !self.emitted && matches!(self.change.kind(), ChangeKind::Deletion), + visit::SourceKind::Copy => { + matches!(self.change.kind(), ChangeKind::Modification) + } + } + } +} + +/// A module with types used in the user-callback in [Tracker::emit()](crate::rewrites::Tracker::emit()). +pub mod visit { + use bstr::BStr; + use gix_object::tree::EntryMode; + + use crate::blob::DiffLineStats; + + /// The source of a rewrite, rename or copy. + #[derive(Debug, Clone, PartialEq, PartialOrd)] + pub struct Source<'a> { + /// The kind of entry. + pub entry_mode: EntryMode, + /// The hash of the state of the source as seen in the object database. + pub id: gix_hash::ObjectId, + /// Further specify what kind of source this is. + pub kind: SourceKind, + /// The repository-relative location of this entry. + pub location: &'a BStr, + /// If this is a rewrite, indicate how many lines would need to change to turn this source into the destination. + pub diff: Option<DiffLineStats>, + } + + /// Further identify the kind of [Source]. + #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum SourceKind { + /// This is the source of an entry that was renamed, as `source` was renamed to `destination`. + Rename, + /// This is the source of a copy, as `source` was copied into `destination`. + Copy, + } + + /// A change along with a location. + #[derive(Clone)] + pub struct Destination<'a, T: Clone> { + /// The change at the given `location`. + pub change: T, + /// The repository-relative location of this destination. + pub location: &'a BStr, + } +} + +/// +pub mod emit { + /// The error returned by [Tracker::emit()](super::Tracker::emit()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Could not find blob for similarity checking")] + FindExistingBlob(#[from] gix_object::find::existing_object::Error), + #[error("Could not obtain exhaustive item set to use as possible sources for copy detection")] + GetItemsForExhaustiveCopyDetection(#[source] Box<dyn std::error::Error + Send + Sync>), + #[error(transparent)] + SetResource(#[from] crate::blob::platform::set_resource::Error), + #[error(transparent)] + PrepareDiff(#[from] crate::blob::platform::prepare_diff::Error), + } +} + +/// Lifecycle +impl<T: Change> Tracker<T> { + /// Create a new instance with `rewrites` configuration. + pub fn new(rewrites: Rewrites) -> Self { + Tracker { + items: vec![], + path_backing: vec![], + rewrites, + } + } +} + +/// build state and find matches. +impl<T: Change> Tracker<T> { + /// We may refuse the push if that information isn't needed for what we have to track. + pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> { + if !change.entry_mode().is_blob_or_symlink() { + return Some(change); + } + let keep = match (self.rewrites.copies, change.kind()) { + (Some(_find_copies), _) => true, + (None, ChangeKind::Modification { .. }) => false, + (None, _) => true, + }; + + if !keep { + return Some(change); + } + + let start = self.path_backing.len(); + self.path_backing.extend_from_slice(location); + self.items.push(Item { + path: start..self.path_backing.len(), + change, + emitted: false, + }); + None + } + + /// Can only be called once effectively as it alters its own state to assure each item is only emitted once. + /// + /// `cb(destination, source)` is called for each item, either with `Some(source)` if it's + /// the destination of a copy or rename, or with `None` for source if no relation to other + /// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened. + /// + /// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database. + /// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not* + /// be enabled as caching is implemented by `diff_cache`, after all, the blob that's actually diffed is going + /// through conversion steps. + /// + /// `diff_cache` is a way to retain a cache of resources that are prepared for rapid diffing, and it also controls + /// the diff-algorithm (provided no user-algorithm is set). + /// Note that we control a few options of `diff_cache` to assure it will ignore external commands. + /// Note that we do not control how the `diff_cache` converts resources, it's left to the caller to decide + /// if it should look at what's stored in `git`, or in the working tree, along with all diff-specific conversions. + /// + /// `push_source_tree(push_fn: push(change, location))` is a function that is called when the entire tree of the source + /// should be added as modifications by calling `push` repeatedly to use for perfect copy tracking. Note that `push` + /// will panic if `change` is not a modification, and it's valid to not call `push` at all. + pub fn emit<PushSourceTreeFn, E>( + &mut self, + mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action, + diff_cache: &mut crate::blob::Platform, + objects: &impl gix_object::FindObjectOrHeader, + mut push_source_tree: PushSourceTreeFn, + ) -> Result<Outcome, emit::Error> + where + PushSourceTreeFn: FnMut(&mut dyn FnMut(T, &BStr)) -> Result<(), E>, + E: std::error::Error + Send + Sync + 'static, + { + diff_cache.options.skip_internal_diff_if_external_is_configured = false; + + fn by_id_and_location<T: Change>(a: &Item<T>, b: &Item<T>) -> std::cmp::Ordering { + a.change + .id() + .cmp(b.change.id()) + .then_with(|| a.path.start.cmp(&b.path.start).then(a.path.end.cmp(&b.path.end))) + } + self.items.sort_by(by_id_and_location); + + let mut out = Outcome { + options: self.rewrites, + ..Default::default() + }; + self.match_pairs_of_kind( + visit::SourceKind::Rename, + &mut cb, + self.rewrites.percentage, + &mut out, + diff_cache, + objects, + )?; + + if let Some(copies) = self.rewrites.copies { + self.match_pairs_of_kind( + visit::SourceKind::Copy, + &mut cb, + copies.percentage, + &mut out, + diff_cache, + objects, + )?; + + match copies.source { + CopySource::FromSetOfModifiedFiles => {} + CopySource::FromSetOfModifiedFilesAndAllSources => { + push_source_tree(&mut |change, location| { + assert!( + self.try_push_change(change, location).is_none(), + "we must accept every change" + ); + // make sure these aren't viable to be emitted anymore. + self.items.last_mut().expect("just pushed").emitted = true; + }) + .map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?; + self.items.sort_by(by_id_and_location); + + self.match_pairs_of_kind( + visit::SourceKind::Copy, + &mut cb, + copies.percentage, + &mut out, + diff_cache, + objects, + )?; + } + } + } + + self.items + .sort_by(|a, b| a.location(&self.path_backing).cmp(b.location(&self.path_backing))); + for item in self.items.drain(..).filter(|item| !item.emitted) { + if cb( + visit::Destination { + location: item.location(&self.path_backing), + change: item.change, + }, + None, + ) == crate::tree::visit::Action::Cancel + { + break; + } + } + Ok(out) + } +} + +impl<T: Change> Tracker<T> { + fn match_pairs_of_kind( + &mut self, + kind: visit::SourceKind, + cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action, + percentage: Option<f32>, + out: &mut Outcome, + diff_cache: &mut crate::blob::Platform, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), emit::Error> { + // we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively. + let needs_second_pass = !needs_exact_match(percentage); + if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? + == crate::tree::visit::Action::Cancel + { + return Ok(()); + } + if needs_second_pass { + let is_limited = if self.rewrites.limit == 0 { + false + } else { + let (num_src, num_dst) = + estimate_involved_items(self.items.iter().map(|item| (item.emitted, item.change.kind())), kind); + let permutations = num_src * num_dst; + if permutations > self.rewrites.limit { + match kind { + visit::SourceKind::Rename => { + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit = permutations; + } + visit::SourceKind::Copy => { + out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit = permutations; + } + } + true + } else { + false + } + }; + if !is_limited { + self.match_pairs(cb, percentage, kind, out, diff_cache, objects)?; + } + } + Ok(()) + } + + fn match_pairs( + &mut self, + cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action, + percentage: Option<f32>, + kind: visit::SourceKind, + stats: &mut Outcome, + diff_cache: &mut crate::blob::Platform, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<crate::tree::visit::Action, emit::Error> { + let mut dest_ofs = 0; + while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| { + (!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item)) + }) { + dest_idx += dest_ofs; + dest_ofs = dest_idx + 1; + let src = find_match( + &self.items, + dest, + dest_idx, + percentage, + kind, + stats, + objects, + diff_cache, + &self.path_backing, + )? + .map(|(src_idx, src, diff)| { + let (id, entry_mode) = src.change.id_and_entry_mode(); + let id = id.to_owned(); + let location = src.location(&self.path_backing); + ( + visit::Source { + entry_mode, + id, + kind, + location, + diff, + }, + src_idx, + ) + }); + if src.is_none() { + continue; + } + let location = dest.location(&self.path_backing); + let change = dest.change.clone(); + let dest = visit::Destination { change, location }; + self.items[dest_idx].emitted = true; + if let Some(src_idx) = src.as_ref().map(|t| t.1) { + self.items[src_idx].emitted = true; + } + if cb(dest, src.map(|t| t.0)) == crate::tree::visit::Action::Cancel { + return Ok(crate::tree::visit::Action::Cancel); + } + } + Ok(crate::tree::visit::Action::Continue) + } +} + +/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation. +fn estimate_involved_items( + items: impl IntoIterator<Item = (bool, ChangeKind)>, + kind: visit::SourceKind, +) -> (usize, usize) { + items + .into_iter() + .filter(|(emitted, _)| match kind { + visit::SourceKind::Rename => !*emitted, + visit::SourceKind::Copy => true, + }) + .fold((0, 0), |(mut src, mut dest), (emitted, change_kind)| { + match change_kind { + ChangeKind::Addition => { + if kind == visit::SourceKind::Rename || !emitted { + dest += 1; + } + } + ChangeKind::Deletion => { + if kind == visit::SourceKind::Rename { + src += 1 + } + } + ChangeKind::Modification => { + if kind == visit::SourceKind::Copy { + src += 1 + } + } + } + (src, dest) + }) +} + +fn needs_exact_match(percentage: Option<f32>) -> bool { + percentage.map_or(true, |p| p >= 1.0) +} + +/// <`src_idx`, src, possibly diff stat> +type SourceTuple<'a, T> = (usize, &'a Item<T>, Option<DiffLineStats>); + +/// Find `item` in our set of items ignoring `item_idx` to avoid finding ourselves, by similarity indicated by `percentage`. +/// The latter can be `None` or `Some(x)` where `x>=1` for identity, and anything else for similarity. +/// We also ignore emitted items entirely. +/// Use `kind` to indicate what kind of match we are looking for, which might be deletions matching an `item` addition, or +/// any non-deletion otherwise. +/// Note that we always try to find by identity first even if a percentage is given as it's much faster and may reduce the set +/// of items to be searched. +#[allow(clippy::too_many_arguments)] +fn find_match<'a, T: Change>( + items: &'a [Item<T>], + item: &Item<T>, + item_idx: usize, + percentage: Option<f32>, + kind: visit::SourceKind, + stats: &mut Outcome, + objects: &impl gix_object::FindObjectOrHeader, + diff_cache: &mut crate::blob::Platform, + path_backing: &[u8], +) -> Result<Option<SourceTuple<'a, T>>, emit::Error> { + let (item_id, item_mode) = item.change.id_and_entry_mode(); + if needs_exact_match(percentage) || item_mode.is_link() { + let first_idx = items.partition_point(|a| a.change.id() < item_id); + let range = match items.get(first_idx..).map(|items| { + let end = items + .iter() + .position(|a| a.change.id() != item_id) + .map_or(items.len(), |idx| first_idx + idx); + first_idx..end + }) { + Some(range) => range, + None => return Ok(None), + }; + if range.is_empty() { + return Ok(None); + } + let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| { + src_idx += range.start; + (src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None)) + }); + if let Some(src) = res { + return Ok(Some(src)); + } + } else { + let mut has_new = false; + let percentage = percentage.expect("it's set to something below 1.0 and we assured this"); + debug_assert_eq!( + item.change.entry_mode().kind(), + EntryKind::Blob, + "symlinks are matched exactly, and trees aren't used here" + ); + + for (can_idx, src) in items + .iter() + .enumerate() + .filter(|(src_idx, src)| *src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)) + { + if !has_new { + diff_cache.set_resource( + item_id.to_owned(), + item_mode.kind(), + item.location(path_backing), + ResourceKind::NewOrDestination, + objects, + )?; + has_new = true; + } + let (src_id, src_mode) = src.change.id_and_entry_mode(); + diff_cache.set_resource( + src_id.to_owned(), + src_mode.kind(), + src.location(path_backing), + ResourceKind::OldOrSource, + objects, + )?; + let prep = diff_cache.prepare_diff()?; + stats.num_similarity_checks += 1; + match prep.operation { + Operation::InternalDiff { algorithm } => { + let tokens = + crate::blob::intern::InternedInput::new(prep.old.intern_source(), prep.new.intern_source()); + let counts = crate::blob::diff( + algorithm, + &tokens, + crate::blob::sink::Counter::new(diff::Statistics { + removed_bytes: 0, + input: &tokens, + }), + ); + let old_data_len = prep.old.data.as_slice().unwrap_or_default().len(); + let new_data_len = prep.new.data.as_slice().unwrap_or_default().len(); + let similarity = (old_data_len - counts.wrapped) as f32 / old_data_len.max(new_data_len) as f32; + if similarity >= percentage { + return Ok(Some(( + can_idx, + src, + DiffLineStats { + removals: counts.removals, + insertions: counts.insertions, + before: tokens.before.len().try_into().expect("interner handles only u32"), + after: tokens.after.len().try_into().expect("interner handles only u32"), + similarity, + } + .into(), + ))); + } + } + Operation::ExternalCommand { .. } => { + unreachable!("we have disabled this possibility with an option") + } + Operation::SourceOrDestinationIsBinary => { + // TODO: figure out if git does more here + } + }; + } + } + Ok(None) +} + +mod diff { + use std::ops::Range; + + pub struct Statistics<'a, 'data> { + pub removed_bytes: usize, + pub input: &'a crate::blob::intern::InternedInput<&'data [u8]>, + } + + impl<'a, 'data> crate::blob::Sink for Statistics<'a, 'data> { + type Out = usize; + + fn process_change(&mut self, before: Range<u32>, _after: Range<u32>) { + self.removed_bytes = self.input.before[before.start as usize..before.end as usize] + .iter() + .map(|token| self.input.interner[*token].len()) + .sum(); + } + + fn finish(self) -> Self::Out { + self.removed_bytes + } + } +} + +#[cfg(test)] +mod estimate_involved_items { + use super::estimate_involved_items; + use crate::rewrites::tracker::{visit::SourceKind, ChangeKind}; + + #[test] + fn renames_count_unemitted_as_sources_and_destinations() { + let items = [ + (false, ChangeKind::Addition), + (true, ChangeKind::Deletion), + (true, ChangeKind::Deletion), + ]; + assert_eq!( + estimate_involved_items(items, SourceKind::Rename), + (0, 1), + "here we only have one eligible source, hence nothing to do" + ); + assert_eq!( + estimate_involved_items(items.into_iter().map(|t| (false, t.1)), SourceKind::Rename), + (2, 1), + "now we have more possibilities as renames count un-emitted deletions as source" + ); + } + + #[test] + fn copies_do_not_count_additions_as_sources() { + let items = [ + (false, ChangeKind::Addition), + (true, ChangeKind::Addition), + (true, ChangeKind::Deletion), + ]; + assert_eq!( + estimate_involved_items(items, SourceKind::Copy), + (0, 1), + "one addition as source, the other isn't counted as it's emitted, nor is it considered a copy-source.\ + deletions don't count" + ); + } + + #[test] + fn copies_count_modifications_as_sources() { + let items = [ + (false, ChangeKind::Addition), + (true, ChangeKind::Modification), + (false, ChangeKind::Modification), + ]; + assert_eq!( + estimate_involved_items(items, SourceKind::Copy), + (2, 1), + "any modifications is a valid source, emitted or not" + ); + } +} diff --git a/vendor/gix-diff/src/tree/changes.rs b/vendor/gix-diff/src/tree/changes.rs index 16e8f7873..ee86bd8bc 100644 --- a/vendor/gix-diff/src/tree/changes.rs +++ b/vendor/gix-diff/src/tree/changes.rs @@ -1,7 +1,6 @@ use std::{borrow::BorrowMut, collections::VecDeque}; -use gix_hash::{oid, ObjectId}; -use gix_object::tree::EntryRef; +use gix_object::{tree::EntryRef, FindExt}; use crate::{ tree, @@ -12,11 +11,8 @@ use crate::{ #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("The object {oid} referenced by the tree or the tree itself was not found in the database")] - FindExisting { - oid: ObjectId, - source: Box<dyn std::error::Error + Send + Sync + 'static>, - }, + #[error(transparent)] + Find(#[from] gix_object::find::existing_iter::Error), #[error("The delegate cancelled the operation")] Cancelled, #[error(transparent)] @@ -24,12 +20,12 @@ pub enum Error { } impl<'a> tree::Changes<'a> { - /// Calculate the changes that would need to be applied to `self` to get `other`. + /// Calculate the changes that would need to be applied to `self` to get `other` using `objects` to obtain objects as needed for traversal. /// /// * The `state` maybe owned or mutably borrowed to allow reuses allocated data structures through multiple runs. /// * `locate` is a function `f(object_id, &mut buffer) -> Option<TreeIter>` to return a `TreeIter` for the given object id backing /// its data in the given buffer. Returning `None` is unexpected as these trees are obtained during iteration, and in a typical - /// database errors are not expected either which is why the error case is omitted. To allow proper error reporting, [`Error::FindExisting`] + /// database errors are not expected either which is why the error case is omitted. To allow proper error reporting, [`Error::Find`] /// should be converted into a more telling error. /// * `delegate` will receive the computed changes, see the [`Visit`][`tree::Visit`] trait for more information on what to expect. /// @@ -47,16 +43,14 @@ impl<'a> tree::Changes<'a> { /// /// [git_cmp_c]: https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/tree-diff.c#L49:L65 /// [git_cmp_rs]: https://github.com/Byron/gitoxide/blob/a4d5f99c8dc99bf814790928a3bf9649cd99486b/gix-object/src/mutable/tree.rs#L52-L55 - pub fn needed_to_obtain<FindFn, R, StateMut, E>( + pub fn needed_to_obtain<R, StateMut>( mut self, other: gix_object::TreeRefIter<'_>, mut state: StateMut, - mut find: FindFn, + objects: impl gix_object::Find, delegate: &mut R, ) -> Result<(), Error> where - FindFn: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<gix_object::TreeRefIter<'b>, E>, - E: std::error::Error + Send + Sync + 'static, R: tree::Visit, StateMut: BorrowMut<tree::State>, { @@ -77,28 +71,16 @@ impl<'a> tree::Changes<'a> { match state.trees.pop_front() { Some((None, Some(rhs))) => { delegate.pop_front_tracked_path_and_set_current(); - rhs_entries = peekable(find(&rhs, &mut state.buf2).map_err(|err| Error::FindExisting { - oid: rhs, - source: err.into(), - })?); + rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?); } Some((Some(lhs), Some(rhs))) => { delegate.pop_front_tracked_path_and_set_current(); - lhs_entries = peekable(find(&lhs, &mut state.buf1).map_err(|err| Error::FindExisting { - oid: lhs, - source: err.into(), - })?); - rhs_entries = peekable(find(&rhs, &mut state.buf2).map_err(|err| Error::FindExisting { - oid: rhs, - source: err.into(), - })?); + lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?); + rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?); } Some((Some(lhs), None)) => { delegate.pop_front_tracked_path_and_set_current(); - lhs_entries = peekable(find(&lhs, &mut state.buf1).map_err(|err| Error::FindExisting { - oid: lhs, - source: err.into(), - })?); + lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?); } Some((None, None)) => unreachable!("BUG: it makes no sense to fill the stack with empties"), None => return Ok(()), @@ -267,9 +249,8 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>( queue: &mut VecDeque<TreeInfoPair>, delegate: &mut R, ) -> Result<(), Error> { - use gix_object::tree::EntryMode::*; - match (lhs.mode, rhs.mode) { - (Tree, Tree) => { + match (lhs.mode.is_tree(), rhs.mode.is_tree()) { + (true, true) => { delegate.push_back_tracked_path_component(lhs.filename); if lhs.oid != rhs.oid && delegate @@ -285,7 +266,7 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>( } queue.push_back((Some(lhs.oid.to_owned()), Some(rhs.oid.to_owned()))); } - (_, Tree) => { + (_, true) => { delegate.push_back_tracked_path_component(lhs.filename); if delegate .visit(Change::Deletion { @@ -307,7 +288,7 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>( }; queue.push_back((None, Some(rhs.oid.to_owned()))); } - (Tree, _) => { + (true, _) => { delegate.push_back_tracked_path_component(lhs.filename); if delegate .visit(Change::Deletion { @@ -329,9 +310,9 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>( }; queue.push_back((Some(lhs.oid.to_owned()), None)); } - (lhs_non_tree, rhs_non_tree) => { + (false, false) => { delegate.push_path_component(lhs.filename); - debug_assert!(lhs_non_tree.is_no_tree() && rhs_non_tree.is_no_tree()); + debug_assert!(lhs.mode.is_no_tree() && lhs.mode.is_no_tree()); if lhs.oid != rhs.oid && delegate .visit(Change::Modification { @@ -359,7 +340,7 @@ fn peekable<I: Iterator>(iter: I) -> IteratorType<I> { mod tests { use std::cmp::Ordering; - use gix_object::tree::EntryMode; + use gix_object::tree::EntryKind; use super::*; @@ -368,12 +349,12 @@ mod tests { let null = gix_hash::ObjectId::null(gix_hash::Kind::Sha1); let actual = compare( &EntryRef { - mode: EntryMode::Blob, + mode: EntryKind::Blob.into(), filename: "plumbing-cli.rs".into(), oid: &null, }, &EntryRef { - mode: EntryMode::Tree, + mode: EntryKind::Tree.into(), filename: "plumbing".into(), oid: &null, }, @@ -381,12 +362,12 @@ mod tests { assert_eq!(actual, Ordering::Less); let actual = compare( &EntryRef { - mode: EntryMode::Tree, + mode: EntryKind::Tree.into(), filename: "plumbing-cli.rs".into(), oid: &null, }, &EntryRef { - mode: EntryMode::Blob, + mode: EntryKind::Blob.into(), filename: "plumbing".into(), oid: &null, }, diff --git a/vendor/gix-diff/src/tree/visit.rs b/vendor/gix-diff/src/tree/visit.rs index 82e38931d..a113d46b1 100644 --- a/vendor/gix-diff/src/tree/visit.rs +++ b/vendor/gix-diff/src/tree/visit.rs @@ -92,6 +92,46 @@ pub trait Visit { fn visit(&mut self, change: Change) -> Action; } +#[cfg(feature = "blob")] +mod change_impls { + use gix_hash::oid; + use gix_object::tree::EntryMode; + + use crate::{rewrites::tracker::ChangeKind, tree::visit::Change}; + + impl crate::rewrites::tracker::Change for crate::tree::visit::Change { + fn id(&self) -> &oid { + match self { + Change::Addition { oid, .. } | Change::Deletion { oid, .. } | Change::Modification { oid, .. } => oid, + } + } + + fn kind(&self) -> ChangeKind { + match self { + Change::Addition { .. } => ChangeKind::Addition, + Change::Deletion { .. } => ChangeKind::Deletion, + Change::Modification { .. } => ChangeKind::Modification, + } + } + + fn entry_mode(&self) -> EntryMode { + match self { + Change::Addition { entry_mode, .. } + | Change::Deletion { entry_mode, .. } + | Change::Modification { entry_mode, .. } => *entry_mode, + } + } + + fn id_and_entry_mode(&self) -> (&oid, EntryMode) { + match self { + Change::Addition { entry_mode, oid, .. } + | Change::Deletion { entry_mode, oid, .. } + | Change::Modification { entry_mode, oid, .. } => (oid, *entry_mode), + } + } + } +} + #[cfg(test)] mod tests { use super::*; |