summaryrefslogtreecommitdiffstats
path: root/vendor/gix-diff/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:53 +0000
commit73e0a5b7696ea019ba35b89f38fc8e7b285d99cb (patch)
tree0d2e175af6f114cb50a675bec0bc76e12e1bceb4 /vendor/gix-diff/src
parentAdding upstream version 1.75.0+dfsg1. (diff)
downloadrustc-upstream.tar.xz
rustc-upstream.zip
Adding upstream version 1.76.0+dfsg1.upstream/1.76.0+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/gix-diff/src')
-rw-r--r--vendor/gix-diff/src/blob.rs3
-rw-r--r--vendor/gix-diff/src/blob/mod.rs133
-rw-r--r--vendor/gix-diff/src/blob/pipeline.rs538
-rw-r--r--vendor/gix-diff/src/blob/platform.rs619
-rw-r--r--vendor/gix-diff/src/lib.rs41
-rw-r--r--vendor/gix-diff/src/rewrites/mod.rs71
-rw-r--r--vendor/gix-diff/src/rewrites/tracker.rs620
-rw-r--r--vendor/gix-diff/src/tree/changes.rs63
-rw-r--r--vendor/gix-diff/src/tree/visit.rs40
9 files changed, 2081 insertions, 47 deletions
diff --git a/vendor/gix-diff/src/blob.rs b/vendor/gix-diff/src/blob.rs
deleted file mode 100644
index 27c1a1317..000000000
--- a/vendor/gix-diff/src/blob.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
-//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
-pub use imara_diff::*;
diff --git a/vendor/gix-diff/src/blob/mod.rs b/vendor/gix-diff/src/blob/mod.rs
new file mode 100644
index 000000000..0c76c2d91
--- /dev/null
+++ b/vendor/gix-diff/src/blob/mod.rs
@@ -0,0 +1,133 @@
+//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
+//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
+use std::{collections::HashMap, path::PathBuf};
+
+use bstr::BString;
+pub use imara_diff::*;
+
+///
+pub mod pipeline;
+
+///
+pub mod platform;
+
+/// Information about the diff performed to detect similarity.
+#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)]
+pub struct DiffLineStats {
+ /// The amount of lines to remove from the source to get to the destination.
+ pub removals: u32,
+ /// The amount of lines to add to the source to get to the destination.
+ pub insertions: u32,
+ /// The amount of lines of the previous state, in the source.
+ pub before: u32,
+ /// The amount of lines of the new state, in the destination.
+ pub after: u32,
+ /// A range from 0 to 1.0, where 1.0 is a perfect match and 0.5 is a similarity of 50%.
+ /// Similarity is the ratio between all lines in the previous blob and the current blob,
+ /// calculated as `(old_lines_count - new_lines_count) as f32 / old_lines_count.max(new_lines_count) as f32`.
+ pub similarity: f32,
+}
+
+/// A way to classify a resource suitable for diffing.
+#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub enum ResourceKind {
+ /// The source of a rewrite, rename or copy operation, or generally the old version of a resource.
+ OldOrSource,
+ /// The destination of a rewrite, rename or copy operation, or generally the new version of a resource.
+ NewOrDestination,
+}
+
+/// A set of values to define how to diff something that is associated with it using `git-attributes`, relevant for regular files.
+///
+/// Some values are related to diffing, some are related to conversions.
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
+pub struct Driver {
+ /// The name of the driver, as referred to by `[diff "name"]` in the git configuration.
+ pub name: BString,
+ /// The command to execute to perform the diff entirely like `<command> old-file old-hex old-mode new-file new-hex new-mode`.
+ ///
+ /// Please note that we don't make this call ourselves, but use it to determine that we should not run the our standard
+ /// built-in algorithm but bail instead as the output of such a program isn't standardized.
+ pub command: Option<BString>,
+ /// The per-driver algorithm to use.
+ pub algorithm: Option<Algorithm>,
+ /// The external filter program to call like `<binary_to_text_command> /path/to/blob` which outputs a textual version of the provided
+ /// binary file.
+ /// Note that it's invoked with a shell if arguments are given.
+ /// Further, if present, it will always be executed, whether `is_binary` is set or not.
+ pub binary_to_text_command: Option<BString>,
+ /// `Some(true)` if this driver deals with binary files, which means that a `binary_to_text_command` should be used to convert binary
+ /// into a textual representation.
+ /// Without such a command, anything that is considered binary is not diffed, but only the size of its data is made available.
+ /// If `Some(false)`, it won't be considered binary, and the its data will not be sampled for the null-byte either.
+ /// Leaving it to `None` means binary detection is automatic, and is based on the presence of the `0` byte in the first 8kB of the buffer.
+ pub is_binary: Option<bool>,
+}
+
+/// A conversion pipeline to take an object or path from what's stored in `git` to what can be diffed, while
+/// following the guidance of git-attributes at the respective path to learn if diffing should happen or if
+/// the content is considered binary.
+///
+/// There are two different conversion flows, where the target of the flow is a buffer with diffable content:
+// TODO: update this with information about possible directions.
+///
+/// * `worktree on disk` -> `text conversion`
+/// * `object` -> `worktree-filters` -> `text conversion`
+#[derive(Clone)]
+pub struct Pipeline {
+ /// A way to read data directly from the worktree.
+ pub roots: pipeline::WorktreeRoots,
+ /// A pipeline to convert objects from what's stored in `git` to its worktree version.
+ pub worktree_filter: gix_filter::Pipeline,
+ /// Options affecting the way we read files.
+ pub options: pipeline::Options,
+ /// Drivers to help customize the conversion behaviour depending on the location of items.
+ drivers: Vec<Driver>,
+ /// Pre-configured attributes to obtain additional diff-related information.
+ attrs: gix_filter::attributes::search::Outcome,
+ /// A buffer to manipulate paths
+ path: PathBuf,
+}
+
+/// A utility for performing a diff of two blobs, including flexible conversions, conversion-caching
+/// acquisition of diff information.
+/// Note that this instance will not call external filters as their output can't be known programmatically,
+/// but it allows to prepare their input if the caller wishes to perform this task.
+///
+/// Optimized for NxM lookups with built-in caching.
+#[derive(Clone)]
+pub struct Platform {
+ /// The old version of a diff-able blob, if set.
+ old: Option<platform::CacheKey>,
+ /// The new version of a diff-able blob, if set.
+ new: Option<platform::CacheKey>,
+
+ /// Options to alter how diffs should be performed.
+ pub options: platform::Options,
+ /// A way to convert objects into a diff-able format.
+ pub filter: Pipeline,
+ /// A way to access .gitattributes
+ pub attr_stack: gix_worktree::Stack,
+
+ /// The way we convert resources into diffable states.
+ filter_mode: pipeline::Mode,
+ /// A continuously growing cache keeping ready-for-diff blobs by their path in the worktree,
+ /// as that is what affects their final diff-able state.
+ ///
+ /// That way, expensive rewrite-checks with NxM matrix checks would be as fast as possible,
+ /// avoiding duplicate work.
+ diff_cache: HashMap<platform::CacheKey, platform::CacheValue>,
+}
+
+mod impls {
+ use crate::blob::ResourceKind;
+
+ impl std::fmt::Display for ResourceKind {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.write_str(match self {
+ ResourceKind::OldOrSource => "old",
+ ResourceKind::NewOrDestination => "new",
+ })
+ }
+ }
+}
diff --git a/vendor/gix-diff/src/blob/pipeline.rs b/vendor/gix-diff/src/blob/pipeline.rs
new file mode 100644
index 000000000..58dddd90b
--- /dev/null
+++ b/vendor/gix-diff/src/blob/pipeline.rs
@@ -0,0 +1,538 @@
+use std::{
+ io::{Read, Write},
+ path::{Path, PathBuf},
+ process::{Command, Stdio},
+};
+
+use bstr::{BStr, ByteSlice};
+use gix_filter::{
+ driver::apply::{Delay, MaybeDelayed},
+ pipeline::convert::{ToGitOutcome, ToWorktreeOutcome},
+};
+use gix_object::tree::EntryKind;
+
+use crate::blob::{Driver, Pipeline, ResourceKind};
+
+/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
+#[derive(Clone, Debug, Default)]
+pub struct WorktreeRoots {
+ /// A place where the source of a rewrite, rename or copy, or generally the previous version of resources, are located.
+ pub old_root: Option<PathBuf>,
+ /// A place where the destination of a rewrite, rename or copy, or generally the new version of resources, are located.
+ pub new_root: Option<PathBuf>,
+}
+
+impl WorktreeRoots {
+ /// Return the root path for the given `kind`
+ pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
+ match kind {
+ ResourceKind::OldOrSource => self.old_root.as_deref(),
+ ResourceKind::NewOrDestination => self.new_root.as_deref(),
+ }
+ }
+}
+
+/// Data as part of an [Outcome].
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub enum Data {
+ /// The data to use for diffing was written into the buffer that was passed during the call to [`Pipeline::convert_to_diffable()`].
+ Buffer,
+ /// The size that the binary blob had at the given revision, without having applied filters, as it's either
+ /// considered binary or above the big-file threshold.
+ ///
+ /// In this state, the binary file cannot be diffed.
+ Binary {
+ /// The size of the object prior to performing any filtering or as it was found on disk.
+ ///
+ /// Note that technically, the size isn't always representative of the same 'state' of the
+ /// content, as once it can be the size of the blob in git, and once it's the size of file
+ /// in the worktree.
+ size: u64,
+ },
+}
+
+/// The outcome returned by [Pipeline::convert_to_diffable()](super::Pipeline::convert_to_diffable()).
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub struct Outcome {
+ /// If available, an index into the `drivers` field to access more diff-related information of the driver for items
+ /// at the given path, as previously determined by git-attributes.
+ ///
+ /// Note that drivers are queried even if there is no object available.
+ pub driver_index: Option<usize>,
+ /// The data itself, suitable for diffing, and if the object or worktree item is present at all.
+ pub data: Option<Data>,
+}
+
+/// Options for use in a [`Pipeline`].
+#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
+pub struct Options {
+ /// The amount of bytes that an object has to reach before being treated as binary.
+ /// These objects will not be queried, nor will their data be processed in any way.
+ /// If `0`, no file is ever considered binary due to their size.
+ ///
+ /// Note that for files stored in `git`, what counts is their stored, decompressed size,
+ /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
+ /// them
+ pub large_file_threshold_bytes: u64,
+ /// Capabilities of the file system which affect how we read worktree files.
+ pub fs: gix_fs::Capabilities,
+}
+
+/// The specific way to convert a resource.
+#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Mode {
+ /// Always prepare the version of the resource as it would be in the work-tree, and
+ /// apply binary-to-text filters if present.
+ ///
+ /// This is typically free for resources in the worktree, and will apply filters to resources in the
+ /// object database.
+ #[default]
+ ToWorktreeAndBinaryToText,
+ /// Prepare the version of the resource as it would be in the work-tree if
+ /// binary-to-text filters are present (and apply them), or use the version in `git` otherwise.
+ ToGitUnlessBinaryToTextIsPresent,
+ /// Always prepare resources as they are stored in `git`.
+ ///
+ /// This is usually fastest, even though resources in the worktree needed to be converted files.
+ ToGit,
+}
+
+impl Mode {
+ fn to_worktree(self) -> bool {
+ matches!(
+ self,
+ Mode::ToGitUnlessBinaryToTextIsPresent | Mode::ToWorktreeAndBinaryToText
+ )
+ }
+
+ fn to_git(self) -> bool {
+ matches!(self, Mode::ToGitUnlessBinaryToTextIsPresent | Mode::ToGit)
+ }
+}
+
+///
+pub mod convert_to_diffable {
+ use bstr::BString;
+ use gix_object::tree::EntryKind;
+
+ /// The error returned by [Pipeline::convert_to_diffable()](super::Pipeline::convert_to_diffable()).
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
+ InvalidEntryKind { rela_path: BString, actual: EntryKind },
+ #[error("Entry at '{rela_path}' could not be read as symbolic link")]
+ ReadLink { rela_path: BString, source: std::io::Error },
+ #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
+ OpenOrRead { rela_path: BString, source: std::io::Error },
+ #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
+ StreamCopy { rela_path: BString, source: std::io::Error },
+ #[error("Failed to run '{cmd}' for binary-to-text conversion of entry at {rela_path}")]
+ RunTextConvFilter {
+ rela_path: BString,
+ cmd: String,
+ source: std::io::Error,
+ },
+ #[error("Tempfile for binary-to-text conversion for entry at {rela_path} could not be created")]
+ CreateTempfile { rela_path: BString, source: std::io::Error },
+ #[error("Binary-to-text conversion '{cmd}' for entry at {rela_path} failed with: {stderr}")]
+ TextConvFilterFailed {
+ rela_path: BString,
+ cmd: String,
+ stderr: BString,
+ },
+ #[error(transparent)]
+ FindObject(#[from] gix_object::find::existing_object::Error),
+ #[error(transparent)]
+ ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
+ #[error(transparent)]
+ ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
+ }
+}
+
+/// Lifecycle
+impl Pipeline {
+ /// Create a new instance of a pipeline which produces blobs suitable for diffing. `roots` allow to read worktree files directly, otherwise
+ /// `worktree_filter` is used to transform object database data directly. `drivers` further configure individual paths.
+ /// `options` are used to further configure the way we act..
+ pub fn new(
+ roots: WorktreeRoots,
+ worktree_filter: gix_filter::Pipeline,
+ mut drivers: Vec<super::Driver>,
+ options: Options,
+ ) -> Self {
+ drivers.sort_by(|a, b| a.name.cmp(&b.name));
+ Pipeline {
+ roots,
+ worktree_filter,
+ drivers,
+ options,
+ attrs: {
+ let mut out = gix_filter::attributes::search::Outcome::default();
+ out.initialize_with_selection(&Default::default(), Some("diff"));
+ out
+ },
+ path: Default::default(),
+ }
+ }
+}
+
+/// Access
+impl Pipeline {
+ /// Return all drivers that this instance was initialized with.
+ pub fn drivers(&self) -> &[super::Driver] {
+ &self.drivers
+ }
+}
+
+/// Conversion
+impl Pipeline {
+ /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
+ /// The resulting diff-able data is written into `out`, assuming it's not too large. The returned [`Outcome`]
+ /// contains information on how to use `out`, or if it's filled at all.
+ ///
+ /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is
+ /// a resource in the object database, i.e. has no worktree root available.
+ ///
+ /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
+ /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`.
+ ///
+ /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
+ ///
+ /// Use `convert` to control what kind of the resource will be produced.
+ ///
+ /// ### About Tempfiles
+ ///
+ /// When querying from the object database and a binary and a [binary-to-text](Driver::binary_to_text_command) is set,
+ /// a temporary file will be created to serve as input for the converter program, containing the worktree-data that
+ /// exactly as it would be present in the worktree if checked out.
+ ///
+ /// As these files are ultimately named tempfiles, they will be leaked unless the [gix_tempfile] is configured with
+ /// a signal handler. If they leak, they would remain in the system's `$TMP` directory.
+ #[allow(clippy::too_many_arguments)]
+ pub fn convert_to_diffable(
+ &mut self,
+ id: &gix_hash::oid,
+ mode: EntryKind,
+ rela_path: &BStr,
+ kind: ResourceKind,
+ attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
+ objects: &dyn gix_object::FindObjectOrHeader,
+ convert: Mode,
+ out: &mut Vec<u8>,
+ ) -> Result<Outcome, convert_to_diffable::Error> {
+ let is_symlink = match mode {
+ EntryKind::Link if self.options.fs.symlink => true,
+ EntryKind::Blob | EntryKind::BlobExecutable => false,
+ _ => {
+ return Err(convert_to_diffable::Error::InvalidEntryKind {
+ rela_path: rela_path.to_owned(),
+ actual: mode,
+ })
+ }
+ };
+
+ out.clear();
+ attributes(rela_path, &mut self.attrs);
+ let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'");
+ let driver_index = attr
+ .assignment
+ .state
+ .as_bstr()
+ .and_then(|name| self.drivers.binary_search_by(|d| d.name.as_bstr().cmp(name)).ok());
+ let driver = driver_index.map(|idx| &self.drivers[idx]);
+ let mut is_binary = if let Some(driver) = driver {
+ driver
+ .is_binary
+ .map(|is_binary| is_binary && driver.binary_to_text_command.is_none())
+ } else {
+ attr.assignment.state.is_unset().then_some(true)
+ };
+ match self.roots.by_kind(kind) {
+ Some(root) => {
+ self.path.clear();
+ self.path.push(root);
+ self.path.push(gix_path::from_bstr(rela_path));
+ let data = if is_symlink {
+ let target = none_if_missing(std::fs::read_link(&self.path)).map_err(|err| {
+ convert_to_diffable::Error::ReadLink {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+ target.map(|target| {
+ out.extend_from_slice(gix_path::into_bstr(target).as_ref());
+ Data::Buffer
+ })
+ } else {
+ let need_size_only = is_binary == Some(true);
+ let size_in_bytes = (need_size_only
+ || (is_binary != Some(false) && self.options.large_file_threshold_bytes > 0))
+ .then(|| {
+ none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
+ convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })
+ })
+ .transpose()?;
+ match size_in_bytes {
+ Some(None) => None, // missing as identified by the size check
+ Some(Some(size)) if size > self.options.large_file_threshold_bytes || need_size_only => {
+ Some(Data::Binary { size })
+ }
+ _ => {
+ match driver
+ .filter(|_| convert.to_worktree())
+ .and_then(|d| d.prepare_binary_to_text_cmd(&self.path))
+ {
+ Some(cmd) => {
+ // Avoid letting the driver program fail if it doesn't exist.
+ if self.options.large_file_threshold_bytes == 0
+ && none_if_missing(std::fs::symlink_metadata(&self.path))
+ .map_err(|err| convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ })?
+ .is_none()
+ {
+ None
+ } else {
+ run_cmd(rela_path, cmd, out)?;
+ Some(Data::Buffer)
+ }
+ }
+ None => {
+ let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
+ convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+
+ match file {
+ Some(mut file) => {
+ if convert.to_git() {
+ let res = self.worktree_filter.convert_to_git(
+ file,
+ gix_path::from_bstr(rela_path).as_ref(),
+ attributes,
+ &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())),
+ )?;
+
+ match res {
+ ToGitOutcome::Unchanged(mut file) => {
+ file.read_to_end(out).map_err(|err| {
+ convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+ }
+ ToGitOutcome::Process(mut stream) => {
+ stream.read_to_end(out).map_err(|err| {
+ convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+ }
+ ToGitOutcome::Buffer(buf) => {
+ out.resize(buf.len(), 0);
+ out.copy_from_slice(buf);
+ }
+ }
+ } else {
+ file.read_to_end(out).map_err(|err| {
+ convert_to_diffable::Error::OpenOrRead {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+ }
+
+ Some(if is_binary.unwrap_or_else(|| is_binary_buf(out)) {
+ let size = out.len() as u64;
+ out.clear();
+ Data::Binary { size }
+ } else {
+ Data::Buffer
+ })
+ }
+ None => None,
+ }
+ }
+ }
+ }
+ }
+ };
+ Ok(Outcome { driver_index, data })
+ }
+ None => {
+ let data = if id.is_null() {
+ None
+ } else {
+ let header = objects
+ .try_header(id)
+ .map_err(gix_object::find::existing_object::Error::Find)?
+ .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
+ if is_binary.is_none()
+ && self.options.large_file_threshold_bytes > 0
+ && header.size > self.options.large_file_threshold_bytes
+ {
+ is_binary = Some(true);
+ };
+ let data = if is_binary == Some(true) {
+ Data::Binary { size: header.size }
+ } else {
+ objects
+ .try_find(id, out)
+ .map_err(gix_object::find::existing_object::Error::Find)?
+ .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
+ if matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable)
+ && convert == Mode::ToWorktreeAndBinaryToText
+ || (convert == Mode::ToGitUnlessBinaryToTextIsPresent
+ && driver.map_or(false, |d| d.binary_to_text_command.is_some()))
+ {
+ let res =
+ self.worktree_filter
+ .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
+
+ let cmd_and_file = driver
+ .and_then(|d| {
+ d.binary_to_text_command.is_some().then(|| {
+ gix_tempfile::new(
+ std::env::temp_dir(),
+ gix_tempfile::ContainingDirectory::Exists,
+ gix_tempfile::AutoRemove::Tempfile,
+ )
+ .and_then(|mut tmp_file| {
+ self.path.clear();
+ tmp_file.with_mut(|tmp| self.path.push(tmp.path()))?;
+ Ok(tmp_file)
+ })
+ .map(|tmp_file| {
+ (
+ d.prepare_binary_to_text_cmd(&self.path)
+ .expect("always get cmd if command is set"),
+ tmp_file,
+ )
+ })
+ })
+ })
+ .transpose()
+ .map_err(|err| convert_to_diffable::Error::CreateTempfile {
+ source: err,
+ rela_path: rela_path.to_owned(),
+ })?;
+ match cmd_and_file {
+ Some((cmd, mut tmp_file)) => {
+ match res {
+ ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => {
+ tmp_file.write_all(buf)
+ }
+ ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
+ std::io::copy(&mut stream, &mut tmp_file).map(|_| ())
+ }
+ ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
+ unreachable!("we prohibit this")
+ }
+ }
+ .map_err(|err| {
+ convert_to_diffable::Error::CreateTempfile {
+ source: err,
+ rela_path: rela_path.to_owned(),
+ }
+ })?;
+ out.clear();
+ run_cmd(rela_path, cmd, out)?;
+ }
+ None => {
+ match res {
+ ToWorktreeOutcome::Unchanged(_) => {}
+ ToWorktreeOutcome::Buffer(src) => {
+ out.resize(src.len(), 0);
+ out.copy_from_slice(src);
+ }
+ ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
+ std::io::copy(&mut stream, out).map_err(|err| {
+ convert_to_diffable::Error::StreamCopy {
+ rela_path: rela_path.to_owned(),
+ source: err,
+ }
+ })?;
+ }
+ ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
+ unreachable!("we prohibit this")
+ }
+ };
+ }
+ }
+ }
+
+ if driver.map_or(true, |d| d.binary_to_text_command.is_none())
+ && is_binary.unwrap_or_else(|| is_binary_buf(out))
+ {
+ let size = out.len() as u64;
+ out.clear();
+ Data::Binary { size }
+ } else {
+ Data::Buffer
+ }
+ };
+ Some(data)
+ };
+ Ok(Outcome { driver_index, data })
+ }
+ }
+ }
+}
+
+fn is_binary_buf(buf: &[u8]) -> bool {
+ let buf = &buf[..buf.len().min(8000)];
+ buf.contains(&0)
+}
+
+fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
+ match res {
+ Ok(data) => Ok(Some(data)),
+ Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
+ Err(err) => Err(err),
+ }
+}
+
+fn run_cmd(rela_path: &BStr, mut cmd: Command, out: &mut Vec<u8>) -> Result<(), convert_to_diffable::Error> {
+ gix_trace::debug!(cmd = ?cmd, "Running binary-to-text command");
+ let mut res = cmd
+ .output()
+ .map_err(|err| convert_to_diffable::Error::RunTextConvFilter {
+ rela_path: rela_path.to_owned(),
+ cmd: format!("{cmd:?}"),
+ source: err,
+ })?;
+ if !res.status.success() {
+ return Err(convert_to_diffable::Error::TextConvFilterFailed {
+ rela_path: rela_path.to_owned(),
+ cmd: format!("{cmd:?}"),
+ stderr: res.stderr.into(),
+ });
+ }
+ out.append(&mut res.stdout);
+ Ok(())
+}
+
+impl Driver {
+ /// Produce an invocable command pre-configured to produce the filtered output on stdout after reading `path`.
+ pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option<std::process::Command> {
+ let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref();
+ let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned())
+ .with_shell()
+ .stdin(Stdio::null())
+ .stdout(Stdio::piped())
+ .stderr(Stdio::piped())
+ .arg(path)
+ .into();
+ Some(cmd)
+ }
+}
diff --git a/vendor/gix-diff/src/blob/platform.rs b/vendor/gix-diff/src/blob/platform.rs
new file mode 100644
index 000000000..fb37b735c
--- /dev/null
+++ b/vendor/gix-diff/src/blob/platform.rs
@@ -0,0 +1,619 @@
+use std::{io::Write, process::Stdio};
+
+use bstr::{BStr, BString, ByteSlice};
+
+use super::Algorithm;
+use crate::blob::{pipeline, Pipeline, Platform, ResourceKind};
+
+/// A key to uniquely identify either a location in the worktree, or in the object database.
+#[derive(Clone)]
+pub(crate) struct CacheKey {
+ id: gix_hash::ObjectId,
+ location: BString,
+ /// If `true`, this is an `id` based key, otherwise it's location based.
+ use_id: bool,
+ /// Only relevant when `id` is not null, to further differentiate content and allow us to
+ /// keep track of both links and blobs with the same content (rare, but possible).
+ is_link: bool,
+}
+
+/// A stored value representing a diffable resource.
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub(crate) struct CacheValue {
+ /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_diffable()].
+ conversion: pipeline::Outcome,
+ /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`.
+ mode: gix_object::tree::EntryKind,
+ /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary.
+ buffer: Vec<u8>,
+}
+
+impl std::hash::Hash for CacheKey {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ if self.use_id {
+ self.id.hash(state);
+ self.is_link.hash(state)
+ } else {
+ self.location.hash(state)
+ }
+ }
+}
+
+impl PartialEq for CacheKey {
+ fn eq(&self, other: &Self) -> bool {
+ match (self.use_id, other.use_id) {
+ (false, false) => self.location.eq(&other.location),
+ (true, true) => self.id.eq(&other.id) && self.is_link.eq(&other.is_link),
+ _ => false,
+ }
+ }
+}
+
+impl Eq for CacheKey {}
+
+impl Default for CacheKey {
+ fn default() -> Self {
+ CacheKey {
+ id: gix_hash::Kind::Sha1.null(),
+ use_id: false,
+ is_link: false,
+ location: BString::default(),
+ }
+ }
+}
+
+impl CacheKey {
+ fn set_location(&mut self, rela_path: &BStr) {
+ self.location.clear();
+ self.location.extend_from_slice(rela_path);
+ }
+}
+
+/// A resource ready to be diffed in one way or another.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct Resource<'a> {
+ /// If available, an index into the `drivers` field to access more diff-related information of the driver for items
+ /// at the given path, as previously determined by git-attributes.
+ ///
+ /// Note that drivers are queried even if there is no object available.
+ pub driver_index: Option<usize>,
+ /// The data itself, suitable for diffing, and if the object or worktree item is present at all.
+ pub data: resource::Data<'a>,
+ /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`.
+ pub mode: gix_object::tree::EntryKind,
+ /// The location of the resource, relative to the working tree.
+ pub rela_path: &'a BStr,
+ /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at
+ /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which has to
+ /// go through a filter to be converted back to what `git` would store.
+ pub id: &'a gix_hash::oid,
+}
+
+///
+pub mod resource {
+ use crate::blob::{
+ pipeline,
+ platform::{CacheKey, CacheValue, Resource},
+ };
+
+ impl<'a> Resource<'a> {
+ pub(crate) fn new(key: &'a CacheKey, value: &'a CacheValue) -> Self {
+ Resource {
+ driver_index: value.conversion.driver_index,
+ data: value.conversion.data.map_or(Data::Missing, |data| match data {
+ pipeline::Data::Buffer => Data::Buffer(&value.buffer),
+ pipeline::Data::Binary { size } => Data::Binary { size },
+ }),
+ mode: value.mode,
+ rela_path: key.location.as_ref(),
+ id: &key.id,
+ }
+ }
+
+ /// Produce an iterator over lines, separated by LF or CRLF, suitable to create tokens using
+ /// [`imara_diff::intern::InternedInput`].
+ pub fn intern_source(&self) -> imara_diff::sources::ByteLines<'a, true> {
+ crate::blob::sources::byte_lines_with_terminator(self.data.as_slice().unwrap_or_default())
+ }
+ }
+
+ /// The data of a diffable resource, as it could be determined and computed previously.
+ #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+ pub enum Data<'a> {
+ /// The object is missing, either because it didn't exist in the working tree or because its `id` was null.
+ Missing,
+ /// The textual data as processed to be in a diffable state.
+ Buffer(&'a [u8]),
+ /// The size that the binary blob had at the given revision, without having applied filters, as it's either
+ /// considered binary or above the big-file threshold.
+ ///
+ /// In this state, the binary file cannot be diffed.
+ Binary {
+ /// The size of the object prior to performing any filtering or as it was found on disk.
+ ///
+ /// Note that technically, the size isn't always representative of the same 'state' of the
+ /// content, as once it can be the size of the blob in git, and once it's the size of file
+ /// in the worktree.
+ size: u64,
+ },
+ }
+
+ impl<'a> Data<'a> {
+ /// Return ourselves as slice of bytes if this instance stores data.
+ pub fn as_slice(&self) -> Option<&'a [u8]> {
+ match self {
+ Data::Buffer(d) => Some(d),
+ Data::Binary { .. } | Data::Missing => None,
+ }
+ }
+ }
+}
+
+///
+pub mod set_resource {
+ use bstr::BString;
+
+ use crate::blob::{pipeline, ResourceKind};
+
+ /// The error returned by [Platform::set_resource](super::Platform::set_resource).
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Can only diff blobs and links, not {mode:?}")]
+ InvalidMode { mode: gix_object::tree::EntryKind },
+ #[error("Failed to read {kind} worktree data from '{rela_path}'")]
+ Io {
+ rela_path: BString,
+ kind: ResourceKind,
+ source: std::io::Error,
+ },
+ #[error("Failed to obtain attributes for {kind} resource at '{rela_path}'")]
+ Attributes {
+ rela_path: BString,
+ kind: ResourceKind,
+ source: std::io::Error,
+ },
+ #[error(transparent)]
+ ConvertToDiffable(#[from] pipeline::convert_to_diffable::Error),
+ }
+}
+
+///
+pub mod prepare_diff {
+ use bstr::BStr;
+
+ use crate::blob::platform::Resource;
+
+ /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation.
+ #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+ pub enum Operation<'a> {
+ /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments.
+ /// This only happens if none of the resources are binary, and if there is no external diff program configured via git-attributes
+ /// *or* [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured)
+ /// is `false`.
+ ///
+ /// Use [`Outcome::interned_input()`] to easily obtain an interner for use with [`imara_diff::diff()`], or maintain one yourself
+ /// for greater re-use.
+ InternalDiff {
+ /// The algorithm we determined should be used, which is one of (in order, first set one wins):
+ ///
+ /// * the driver's override
+ /// * the platforms own configuration (typically from git-config)
+ /// * the default algorithm
+ algorithm: imara_diff::Algorithm,
+ },
+ /// Run the external diff program according as configured in the `source`-resources driver.
+ /// This only happens if [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured)
+ /// was `true`, preventing the usage of the internal diff implementation.
+ ExternalCommand {
+ /// The command as extracted from [Driver::command](super::super::Driver::command).
+ /// Use it in [`Platform::prepare_diff_command`](super::Platform::prepare_diff_command()) to easily prepare a compatible invocation.
+ command: &'a BStr,
+ },
+ /// One of the involved resources, [`old`](Outcome::old) or [`new`](Outcome::new), were binary and thus no diff
+ /// cannot be performed.
+ SourceOrDestinationIsBinary,
+ }
+
+ /// The outcome of a [`prepare_diff`](super::Platform::prepare_diff()) operation.
+ #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+ pub struct Outcome<'a> {
+ /// The kind of diff that was actually performed. This may include skipping the internal diff as well.
+ pub operation: Operation<'a>,
+ /// The old or source of the diff operation.
+ pub old: Resource<'a>,
+ /// The new or destination of the diff operation.
+ pub new: Resource<'a>,
+ }
+
+ impl<'a> Outcome<'a> {
+ /// Produce an instance of an interner which `git` would use to perform diffs.
+ pub fn interned_input(&self) -> imara_diff::intern::InternedInput<&'a [u8]> {
+ crate::blob::intern::InternedInput::new(self.old.intern_source(), self.new.intern_source())
+ }
+ }
+
+ /// The error returned by [Platform::prepare_diff()](super::Platform::prepare_diff()).
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Either the source or the destination of the diff operation were not set")]
+ SourceOrDestinationUnset,
+ #[error("Tried to diff resources that are both considered removed")]
+ SourceAndDestinationRemoved,
+ }
+}
+
+///
+pub mod prepare_diff_command {
+ use std::ops::{Deref, DerefMut};
+
+ use bstr::BString;
+
+ /// The error returned by [Platform::prepare_diff_command()](super::Platform::prepare_diff_command()).
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Either the source or the destination of the diff operation were not set")]
+ SourceOrDestinationUnset,
+ #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")]
+ SourceOrDestinationBinary,
+ #[error(
+ "Tempfile to store content of '{rela_path}' for passing to external diff command could not be created"
+ )]
+ CreateTempfile { rela_path: BString, source: std::io::Error },
+ #[error("Could not write content of '{rela_path}' to tempfile for passing to external diff command")]
+ WriteTempfile { rela_path: BString, source: std::io::Error },
+ }
+
+ /// The outcome of a [`prepare_diff_command`](super::Platform::prepare_diff_command()) operation.
+ ///
+ /// This type acts like [`std::process::Command`], ready to run, with `stdin`, `stdout` and `stderr` set to *inherit*
+ /// all handles as this is expected to be for visual inspection.
+ pub struct Command {
+ pub(crate) cmd: std::process::Command,
+ /// Possibly a tempfile to be removed after the run, or `None` if there is no old version.
+ pub(crate) old: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>,
+ /// Possibly a tempfile to be removed after the run, or `None` if there is no new version.
+ pub(crate) new: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>,
+ }
+
+ impl Deref for Command {
+ type Target = std::process::Command;
+
+ fn deref(&self) -> &Self::Target {
+ &self.cmd
+ }
+ }
+
+ impl DerefMut for Command {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.cmd
+ }
+ }
+}
+
+/// Options for use in [Platform::new()].
+#[derive(Default, Copy, Clone)]
+pub struct Options {
+ /// The algorithm to use when diffing.
+ /// If unset, it uses the [default algorithm](Algorithm::default()).
+ pub algorithm: Option<Algorithm>,
+ /// If `true`, default `false`, then an external `diff` configured using gitattributes and drivers,
+ /// will cause the built-in diff [to be skipped](prepare_diff::Operation::ExternalCommand).
+ /// Otherwise, the internal diff is called despite the configured external diff, which is
+ /// typically what callers expect by default.
+ pub skip_internal_diff_if_external_is_configured: bool,
+}
+
+/// Lifecycle
+impl Platform {
+ /// Create a new instance with `options`, and a way to `filter` data from the object database to data that is diff-able.
+ /// `filter_mode` decides how to do that specifically.
+ /// Use `attr_stack` to access attributes pertaining worktree filters and diff settings.
+ pub fn new(
+ options: Options,
+ filter: Pipeline,
+ filter_mode: pipeline::Mode,
+ attr_stack: gix_worktree::Stack,
+ ) -> Self {
+ Platform {
+ old: None,
+ new: None,
+ diff_cache: Default::default(),
+ options,
+ filter,
+ filter_mode,
+ attr_stack,
+ }
+ }
+}
+
+/// Conversions
+impl Platform {
+ /// Store enough information about a resource to eventually diff it, where…
+ ///
+ /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either
+ /// be a resource in the worktree, or it's considered a non-existing, deleted object.
+ /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided
+ /// for completeness.
+ /// * `mode` is the kind of object (only blobs and links are allowed)
+ /// * `rela_path` is the relative path as seen from the (work)tree root.
+ /// * `kind` identifies the side of the diff this resource will be used for.
+ /// A diff needs both `OldOrSource` *and* `NewOrDestination`.
+ /// * `objects` provides access to the object database in case the resource can't be read from a worktree.
+ ///
+ /// Note that it's assumed that either `id + mode (` or `rela_path` can serve as unique identifier for the resource,
+ /// depending on whether or not a [worktree root](pipeline::WorktreeRoots) is set for the resource of `kind`,
+ /// with resources with worktree roots using the `rela_path` as unique identifier.
+ ///
+ /// ### Important
+ ///
+ /// If an error occours, the previous resource of `kind` will be cleared, preventing further diffs
+ /// unless another attempt succeeds.
+ pub fn set_resource(
+ &mut self,
+ id: gix_hash::ObjectId,
+ mode: gix_object::tree::EntryKind,
+ rela_path: &BStr,
+ kind: ResourceKind,
+ objects: &impl gix_object::FindObjectOrHeader, // TODO: make this `dyn` once https://github.com/rust-lang/rust/issues/65991 is stable, then also make tracker.rs `objects` dyn
+ ) -> Result<(), set_resource::Error> {
+ let res = self.set_resource_inner(id, mode, rela_path, kind, objects);
+ if res.is_err() {
+ *match kind {
+ ResourceKind::OldOrSource => &mut self.old,
+ ResourceKind::NewOrDestination => &mut self.new,
+ } = None;
+ }
+ res
+ }
+
+ /// Given `diff_command` and `context`, typically obtained from git-configuration, and the currently set diff-resources,
+ /// prepare the invocation and temporary files needed to launch it according to protocol.
+ /// `count` / `total` are used for progress indication passed as environment variables `GIT_DIFF_PATH_(COUNTER|TOTAL)`
+ /// respectively (0-based), so the first path has `count=0` and `total=1` (assuming there is only one path).
+ /// Returns `None` if at least one resource is unset, see [`set_resource()`](Self::set_resource()).
+ ///
+ /// Please note that this is an expensive operation this will always create up to two temporary files to hold the data
+ /// for the old and new resources.
+ ///
+ /// ### Deviation
+ ///
+ /// If one of the resources is binary, the operation reports an error as such resources don't make their data available
+ /// which is required for the external diff to run.
+ pub fn prepare_diff_command(
+ &self,
+ diff_command: BString,
+ context: gix_command::Context,
+ count: usize,
+ total: usize,
+ ) -> Result<prepare_diff_command::Command, prepare_diff_command::Error> {
+ fn add_resource(
+ cmd: &mut std::process::Command,
+ res: Resource<'_>,
+ ) -> Result<Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>, prepare_diff_command::Error> {
+ let tmpfile = match res.data {
+ resource::Data::Missing => {
+ cmd.args(["/dev/null", ".", "."]);
+ None
+ }
+ resource::Data::Buffer(buf) => {
+ let mut tmp = gix_tempfile::new(
+ std::env::temp_dir(),
+ gix_tempfile::ContainingDirectory::Exists,
+ gix_tempfile::AutoRemove::Tempfile,
+ )
+ .map_err(|err| prepare_diff_command::Error::CreateTempfile {
+ rela_path: res.rela_path.to_owned(),
+ source: err,
+ })?;
+ tmp.write_all(buf)
+ .map_err(|err| prepare_diff_command::Error::WriteTempfile {
+ rela_path: res.rela_path.to_owned(),
+ source: err,
+ })?;
+ tmp.with_mut(|f| {
+ cmd.arg(f.path());
+ })
+ .map_err(|err| prepare_diff_command::Error::WriteTempfile {
+ rela_path: res.rela_path.to_owned(),
+ source: err,
+ })?;
+ cmd.arg(res.id.to_string()).arg(res.mode.as_octal_str().to_string());
+ let tmp = tmp.close().map_err(|err| prepare_diff_command::Error::WriteTempfile {
+ rela_path: res.rela_path.to_owned(),
+ source: err,
+ })?;
+ Some(tmp)
+ }
+ resource::Data::Binary { .. } => return Err(prepare_diff_command::Error::SourceOrDestinationBinary),
+ };
+ Ok(tmpfile)
+ }
+
+ let (old, new) = self
+ .resources()
+ .ok_or(prepare_diff_command::Error::SourceOrDestinationUnset)?;
+ let mut cmd: std::process::Command = gix_command::prepare(gix_path::from_bstring(diff_command))
+ .with_context(context)
+ .env("GIT_DIFF_PATH_COUNTER", (count + 1).to_string())
+ .env("GIT_DIFF_PATH_TOTAL", total.to_string())
+ .stdin(Stdio::inherit())
+ .stdout(Stdio::inherit())
+ .stderr(Stdio::inherit())
+ .into();
+
+ cmd.arg(gix_path::from_bstr(old.rela_path).into_owned());
+ let mut out = prepare_diff_command::Command {
+ cmd,
+ old: None,
+ new: None,
+ };
+
+ out.old = add_resource(&mut out.cmd, old)?;
+ out.new = add_resource(&mut out.cmd, new)?;
+
+ if old.rela_path != new.rela_path {
+ out.cmd.arg(gix_path::from_bstr(new.rela_path).into_owned());
+ }
+
+ Ok(out)
+ }
+
+ /// Returns the resource of the given kind if it was set.
+ pub fn resource(&self, kind: ResourceKind) -> Option<Resource<'_>> {
+ let key = match kind {
+ ResourceKind::OldOrSource => self.old.as_ref(),
+ ResourceKind::NewOrDestination => self.new.as_ref(),
+ }?;
+ Resource::new(key, self.diff_cache.get(key)?).into()
+ }
+
+ /// Obtain the two resources that were previously set as `(OldOrSource, NewOrDestination)`, if both are set and available.
+ ///
+ /// This is useful if one wishes to manually prepare the diff, maybe for invoking external programs, instead of relying on
+ /// [`Self::prepare_diff()`].
+ pub fn resources(&self) -> Option<(Resource<'_>, Resource<'_>)> {
+ let key = &self.old.as_ref()?;
+ let value = self.diff_cache.get(key)?;
+ let old = Resource::new(key, value);
+
+ let key = &self.new.as_ref()?;
+ let value = self.diff_cache.get(key)?;
+ let new = Resource::new(key, value);
+ Some((old, new))
+ }
+
+ /// Prepare a diff operation on the [previously set](Self::set_resource()) [old](ResourceKind::OldOrSource) and
+ /// [new](ResourceKind::NewOrDestination) resources.
+ ///
+ /// The returned outcome allows to easily perform diff operations, based on the [`prepare_diff::Outcome::operation`] field,
+ /// which hints at what should be done.
+ pub fn prepare_diff(&mut self) -> Result<prepare_diff::Outcome<'_>, prepare_diff::Error> {
+ let old_key = &self.old.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
+ let old = self
+ .diff_cache
+ .get(old_key)
+ .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
+ let new_key = &self.new.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
+ let new = self
+ .diff_cache
+ .get(new_key)
+ .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
+ let mut out = prepare_diff::Outcome {
+ operation: prepare_diff::Operation::SourceOrDestinationIsBinary,
+ old: Resource::new(old_key, old),
+ new: Resource::new(new_key, new),
+ };
+
+ match (old.conversion.data, new.conversion.data) {
+ (None, None) => return Err(prepare_diff::Error::SourceAndDestinationRemoved),
+ (Some(pipeline::Data::Binary { .. }), _) | (_, Some(pipeline::Data::Binary { .. })) => return Ok(out),
+ _either_missing_or_non_binary => {
+ if let Some(command) = old
+ .conversion
+ .driver_index
+ .and_then(|idx| self.filter.drivers[idx].command.as_deref())
+ .filter(|_| self.options.skip_internal_diff_if_external_is_configured)
+ {
+ out.operation = prepare_diff::Operation::ExternalCommand {
+ command: command.as_bstr(),
+ };
+ return Ok(out);
+ }
+ }
+ }
+
+ out.operation = prepare_diff::Operation::InternalDiff {
+ algorithm: old
+ .conversion
+ .driver_index
+ .and_then(|idx| self.filter.drivers[idx].algorithm)
+ .or(self.options.algorithm)
+ .unwrap_or_default(),
+ };
+ Ok(out)
+ }
+
+ /// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared.
+ ///
+ /// Use this method to clear the cache, releasing memory. Note that this will also loose all information about resources
+ /// which means diffs would fail unless the resources are set again.
+ ///
+ /// Note that this also has to be called if the same resource is going to be diffed in different states, i.e. using different
+ /// `id`s, but the same `rela_path`.
+ pub fn clear_resource_cache(&mut self) {
+ self.old = None;
+ self.new = None;
+ self.diff_cache.clear();
+ }
+}
+
+impl Platform {
+ fn set_resource_inner(
+ &mut self,
+ id: gix_hash::ObjectId,
+ mode: gix_object::tree::EntryKind,
+ rela_path: &BStr,
+ kind: ResourceKind,
+ objects: &impl gix_object::FindObjectOrHeader,
+ ) -> Result<(), set_resource::Error> {
+ if matches!(
+ mode,
+ gix_object::tree::EntryKind::Commit | gix_object::tree::EntryKind::Tree
+ ) {
+ return Err(set_resource::Error::InvalidMode { mode });
+ }
+ let storage = match kind {
+ ResourceKind::OldOrSource => &mut self.old,
+ ResourceKind::NewOrDestination => &mut self.new,
+ }
+ .get_or_insert_with(Default::default);
+
+ storage.id = id;
+ storage.set_location(rela_path);
+ storage.is_link = matches!(mode, gix_object::tree::EntryKind::Link);
+ storage.use_id = self.filter.roots.by_kind(kind).is_none();
+
+ if self.diff_cache.contains_key(storage) {
+ return Ok(());
+ }
+ let entry = self
+ .attr_stack
+ .at_entry(rela_path, Some(false), objects)
+ .map_err(|err| set_resource::Error::Attributes {
+ source: err,
+ kind,
+ rela_path: rela_path.to_owned(),
+ })?;
+ let mut buf = Vec::new();
+ let out = self.filter.convert_to_diffable(
+ &id,
+ mode,
+ rela_path,
+ kind,
+ &mut |_, out| {
+ let _ = entry.matching_attributes(out);
+ },
+ objects,
+ self.filter_mode,
+ &mut buf,
+ )?;
+ let key = storage.clone();
+ assert!(
+ self.diff_cache
+ .insert(
+ key,
+ CacheValue {
+ conversion: out,
+ mode,
+ buffer: buf,
+ },
+ )
+ .is_none(),
+ "The key impl makes clashes impossible with our usage"
+ );
+ Ok(())
+ }
+}
diff --git a/vendor/gix-diff/src/lib.rs b/vendor/gix-diff/src/lib.rs
index 6d94a7591..1fe8d2e6b 100644
--- a/vendor/gix-diff/src/lib.rs
+++ b/vendor/gix-diff/src/lib.rs
@@ -1,13 +1,48 @@
//! Algorithms for diffing various git object types and for generating patches, highly optimized for performance.
//! ## Feature Flags
#![cfg_attr(
-feature = "document-features",
-cfg_attr(doc, doc = ::document_features::document_features!())
+ all(doc, feature = "document-features"),
+ doc = ::document_features::document_features!()
)]
-#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
+#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
#![deny(missing_docs, rust_2018_idioms)]
#![forbid(unsafe_code)]
+/// Re-export for use in public API.
+#[cfg(feature = "blob")]
+pub use gix_command as command;
+/// Re-export for use in public API.
+#[cfg(feature = "blob")]
+pub use gix_object as object;
+
+/// A structure to capture how to perform rename and copy tracking, used by the [rewrites::Tracker].
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg(feature = "blob")]
+pub struct Rewrites {
+ /// If `Some(…)`, also find copies. `None` is the default which does not try to detect copies at all.
+ ///
+ /// Note that this is an even more expensive operation than detecting renames stemming from additions and deletions
+ /// as the resulting set to search through is usually larger.
+ pub copies: Option<rewrites::Copies>,
+ /// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
+ /// This field is similar to `git diff -M50%`.
+ ///
+ /// If `None`, files are only considered equal if their content matches 100%.
+ /// Note that values greater than 1.0 have no different effect than 1.0.
+ pub percentage: Option<f32>,
+ /// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
+ /// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
+ /// If 0, there is no limit.
+ ///
+ /// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
+ /// run the fuzzy version of identity tests at all. That way results are never partial.
+ pub limit: usize,
+}
+
+/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.
+#[cfg(feature = "blob")]
+pub mod rewrites;
+
///
pub mod tree;
diff --git a/vendor/gix-diff/src/rewrites/mod.rs b/vendor/gix-diff/src/rewrites/mod.rs
new file mode 100644
index 000000000..08d6f2cce
--- /dev/null
+++ b/vendor/gix-diff/src/rewrites/mod.rs
@@ -0,0 +1,71 @@
+use crate::Rewrites;
+
+/// Types related to the rename tracker for renames, rewrites and copies.
+pub mod tracker;
+
+/// A type to retain state related to an ongoing tracking operation to retain sets of interesting changes
+/// of which some are retained to at a later stage compute the ones that seem to be renames or copies.
+pub struct Tracker<T> {
+ /// The tracked items thus far, which will be used to determine renames/copies and rewrites later.
+ items: Vec<tracker::Item<T>>,
+ /// A place to store all paths in to reduce amount of allocations.
+ path_backing: Vec<u8>,
+ /// How to track copies and/or rewrites.
+ rewrites: Rewrites,
+}
+
+/// Determine in which set of files to search for copies.
+#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
+pub enum CopySource {
+ /// Find copies from the set of modified files only.
+ #[default]
+ FromSetOfModifiedFiles,
+ /// Find copies from the set of modified files, as well as all files known to the source (i.e. previous state of the tree).
+ ///
+ /// This can be an expensive operation as it scales exponentially with the total amount of files in the set.
+ FromSetOfModifiedFilesAndAllSources,
+}
+
+/// Under which circumstances we consider a file to be a copy.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Copies {
+ /// The set of files to search when finding the source of copies.
+ pub source: CopySource,
+ /// Equivalent to [`Rewrites::percentage`], but used for copy tracking.
+ ///
+ /// Useful to have similarity-based rename tracking and cheaper copy tracking.
+ pub percentage: Option<f32>,
+}
+
+impl Default for Copies {
+ fn default() -> Self {
+ Copies {
+ source: CopySource::default(),
+ percentage: Some(0.5),
+ }
+ }
+}
+
+/// Information collected while handling rewrites of files which may be tracked.
+#[derive(Default, Clone, Copy, Debug, PartialEq)]
+pub struct Outcome {
+ /// The options used to guide the rewrite tracking. Either fully provided by the caller or retrieved from git configuration.
+ pub options: Rewrites,
+ /// The amount of similarity checks that have been conducted to find renamed files and potentially copies.
+ pub num_similarity_checks: usize,
+ /// Set to the amount of worst-case rename permutations we didn't search as our limit didn't allow it.
+ pub num_similarity_checks_skipped_for_rename_tracking_due_to_limit: usize,
+ /// Set to the amount of worst-case copy permutations we didn't search as our limit didn't allow it.
+ pub num_similarity_checks_skipped_for_copy_tracking_due_to_limit: usize,
+}
+
+/// The default settings for rewrites according to the git configuration defaults.
+impl Default for Rewrites {
+ fn default() -> Self {
+ Rewrites {
+ copies: None,
+ percentage: Some(0.5),
+ limit: 1000,
+ }
+ }
+}
diff --git a/vendor/gix-diff/src/rewrites/tracker.rs b/vendor/gix-diff/src/rewrites/tracker.rs
new file mode 100644
index 000000000..95ebe7fab
--- /dev/null
+++ b/vendor/gix-diff/src/rewrites/tracker.rs
@@ -0,0 +1,620 @@
+//! ### Deviation
+//!
+//! Note that the algorithm implemented here is in many ways different from what `git` does.
+//!
+//! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match.
+//! - the set used for copy-detection is probably smaller by default.
+use std::ops::Range;
+
+use bstr::BStr;
+use gix_object::tree::{EntryKind, EntryMode};
+
+use crate::{
+ blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind},
+ rewrites::{CopySource, Outcome, Tracker},
+ Rewrites,
+};
+
+/// The kind of a change.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, PartialEq, Eq)]
+pub enum ChangeKind {
+ /// The change represents the *deletion* of an item.
+ Deletion,
+ /// The change represents the *modification* of an item.
+ Modification,
+ /// The change represents the *addition* of an item.
+ Addition,
+}
+
+/// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`].
+pub trait Change: Clone {
+ /// Return the hash of this change for identification.
+ ///
+ /// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace
+ /// conversions.
+ fn id(&self) -> &gix_hash::oid;
+ /// Return the kind of this change.
+ fn kind(&self) -> ChangeKind;
+ /// Return more information about the kind of entry affected by this change.
+ fn entry_mode(&self) -> EntryMode;
+ /// Return the id of the change along with its mode.
+ fn id_and_entry_mode(&self) -> (&gix_hash::oid, EntryMode);
+}
+
+/// A set of tracked items allows to figure out their relations by figuring out their similarity.
+pub(crate) struct Item<T> {
+ /// The underlying raw change
+ change: T,
+ /// That slice into the backing for paths.
+ path: Range<usize>,
+ /// If true, this item was already emitted, i.e. seen by the caller.
+ emitted: bool,
+}
+
+impl<T: Change> Item<T> {
+ fn location<'a>(&self, backing: &'a [u8]) -> &'a BStr {
+ backing[self.path.clone()].as_ref()
+ }
+ fn entry_mode_compatible(&self, mode: EntryMode) -> bool {
+ use EntryKind::*;
+ matches!(
+ (mode.kind(), self.change.entry_mode().kind()),
+ (Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link)
+ )
+ }
+
+ fn is_source_for_destination_of(&self, kind: visit::SourceKind, dest_item_mode: EntryMode) -> bool {
+ self.entry_mode_compatible(dest_item_mode)
+ && match kind {
+ visit::SourceKind::Rename => !self.emitted && matches!(self.change.kind(), ChangeKind::Deletion),
+ visit::SourceKind::Copy => {
+ matches!(self.change.kind(), ChangeKind::Modification)
+ }
+ }
+ }
+}
+
+/// A module with types used in the user-callback in [Tracker::emit()](crate::rewrites::Tracker::emit()).
+pub mod visit {
+ use bstr::BStr;
+ use gix_object::tree::EntryMode;
+
+ use crate::blob::DiffLineStats;
+
+ /// The source of a rewrite, rename or copy.
+ #[derive(Debug, Clone, PartialEq, PartialOrd)]
+ pub struct Source<'a> {
+ /// The kind of entry.
+ pub entry_mode: EntryMode,
+ /// The hash of the state of the source as seen in the object database.
+ pub id: gix_hash::ObjectId,
+ /// Further specify what kind of source this is.
+ pub kind: SourceKind,
+ /// The repository-relative location of this entry.
+ pub location: &'a BStr,
+ /// If this is a rewrite, indicate how many lines would need to change to turn this source into the destination.
+ pub diff: Option<DiffLineStats>,
+ }
+
+ /// Further identify the kind of [Source].
+ #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+ pub enum SourceKind {
+ /// This is the source of an entry that was renamed, as `source` was renamed to `destination`.
+ Rename,
+ /// This is the source of a copy, as `source` was copied into `destination`.
+ Copy,
+ }
+
+ /// A change along with a location.
+ #[derive(Clone)]
+ pub struct Destination<'a, T: Clone> {
+ /// The change at the given `location`.
+ pub change: T,
+ /// The repository-relative location of this destination.
+ pub location: &'a BStr,
+ }
+}
+
+///
+pub mod emit {
+ /// The error returned by [Tracker::emit()](super::Tracker::emit()).
+ #[derive(Debug, thiserror::Error)]
+ #[allow(missing_docs)]
+ pub enum Error {
+ #[error("Could not find blob for similarity checking")]
+ FindExistingBlob(#[from] gix_object::find::existing_object::Error),
+ #[error("Could not obtain exhaustive item set to use as possible sources for copy detection")]
+ GetItemsForExhaustiveCopyDetection(#[source] Box<dyn std::error::Error + Send + Sync>),
+ #[error(transparent)]
+ SetResource(#[from] crate::blob::platform::set_resource::Error),
+ #[error(transparent)]
+ PrepareDiff(#[from] crate::blob::platform::prepare_diff::Error),
+ }
+}
+
+/// Lifecycle
+impl<T: Change> Tracker<T> {
+ /// Create a new instance with `rewrites` configuration.
+ pub fn new(rewrites: Rewrites) -> Self {
+ Tracker {
+ items: vec![],
+ path_backing: vec![],
+ rewrites,
+ }
+ }
+}
+
+/// build state and find matches.
+impl<T: Change> Tracker<T> {
+ /// We may refuse the push if that information isn't needed for what we have to track.
+ pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> {
+ if !change.entry_mode().is_blob_or_symlink() {
+ return Some(change);
+ }
+ let keep = match (self.rewrites.copies, change.kind()) {
+ (Some(_find_copies), _) => true,
+ (None, ChangeKind::Modification { .. }) => false,
+ (None, _) => true,
+ };
+
+ if !keep {
+ return Some(change);
+ }
+
+ let start = self.path_backing.len();
+ self.path_backing.extend_from_slice(location);
+ self.items.push(Item {
+ path: start..self.path_backing.len(),
+ change,
+ emitted: false,
+ });
+ None
+ }
+
+ /// Can only be called once effectively as it alters its own state to assure each item is only emitted once.
+ ///
+ /// `cb(destination, source)` is called for each item, either with `Some(source)` if it's
+ /// the destination of a copy or rename, or with `None` for source if no relation to other
+ /// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened.
+ ///
+ /// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database.
+ /// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not*
+ /// be enabled as caching is implemented by `diff_cache`, after all, the blob that's actually diffed is going
+ /// through conversion steps.
+ ///
+ /// `diff_cache` is a way to retain a cache of resources that are prepared for rapid diffing, and it also controls
+ /// the diff-algorithm (provided no user-algorithm is set).
+ /// Note that we control a few options of `diff_cache` to assure it will ignore external commands.
+ /// Note that we do not control how the `diff_cache` converts resources, it's left to the caller to decide
+ /// if it should look at what's stored in `git`, or in the working tree, along with all diff-specific conversions.
+ ///
+ /// `push_source_tree(push_fn: push(change, location))` is a function that is called when the entire tree of the source
+ /// should be added as modifications by calling `push` repeatedly to use for perfect copy tracking. Note that `push`
+ /// will panic if `change` is not a modification, and it's valid to not call `push` at all.
+ pub fn emit<PushSourceTreeFn, E>(
+ &mut self,
+ mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action,
+ diff_cache: &mut crate::blob::Platform,
+ objects: &impl gix_object::FindObjectOrHeader,
+ mut push_source_tree: PushSourceTreeFn,
+ ) -> Result<Outcome, emit::Error>
+ where
+ PushSourceTreeFn: FnMut(&mut dyn FnMut(T, &BStr)) -> Result<(), E>,
+ E: std::error::Error + Send + Sync + 'static,
+ {
+ diff_cache.options.skip_internal_diff_if_external_is_configured = false;
+
+ fn by_id_and_location<T: Change>(a: &Item<T>, b: &Item<T>) -> std::cmp::Ordering {
+ a.change
+ .id()
+ .cmp(b.change.id())
+ .then_with(|| a.path.start.cmp(&b.path.start).then(a.path.end.cmp(&b.path.end)))
+ }
+ self.items.sort_by(by_id_and_location);
+
+ let mut out = Outcome {
+ options: self.rewrites,
+ ..Default::default()
+ };
+ self.match_pairs_of_kind(
+ visit::SourceKind::Rename,
+ &mut cb,
+ self.rewrites.percentage,
+ &mut out,
+ diff_cache,
+ objects,
+ )?;
+
+ if let Some(copies) = self.rewrites.copies {
+ self.match_pairs_of_kind(
+ visit::SourceKind::Copy,
+ &mut cb,
+ copies.percentage,
+ &mut out,
+ diff_cache,
+ objects,
+ )?;
+
+ match copies.source {
+ CopySource::FromSetOfModifiedFiles => {}
+ CopySource::FromSetOfModifiedFilesAndAllSources => {
+ push_source_tree(&mut |change, location| {
+ assert!(
+ self.try_push_change(change, location).is_none(),
+ "we must accept every change"
+ );
+ // make sure these aren't viable to be emitted anymore.
+ self.items.last_mut().expect("just pushed").emitted = true;
+ })
+ .map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?;
+ self.items.sort_by(by_id_and_location);
+
+ self.match_pairs_of_kind(
+ visit::SourceKind::Copy,
+ &mut cb,
+ copies.percentage,
+ &mut out,
+ diff_cache,
+ objects,
+ )?;
+ }
+ }
+ }
+
+ self.items
+ .sort_by(|a, b| a.location(&self.path_backing).cmp(b.location(&self.path_backing)));
+ for item in self.items.drain(..).filter(|item| !item.emitted) {
+ if cb(
+ visit::Destination {
+ location: item.location(&self.path_backing),
+ change: item.change,
+ },
+ None,
+ ) == crate::tree::visit::Action::Cancel
+ {
+ break;
+ }
+ }
+ Ok(out)
+ }
+}
+
+impl<T: Change> Tracker<T> {
+ fn match_pairs_of_kind(
+ &mut self,
+ kind: visit::SourceKind,
+ cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action,
+ percentage: Option<f32>,
+ out: &mut Outcome,
+ diff_cache: &mut crate::blob::Platform,
+ objects: &impl gix_object::FindObjectOrHeader,
+ ) -> Result<(), emit::Error> {
+ // we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
+ let needs_second_pass = !needs_exact_match(percentage);
+ if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)?
+ == crate::tree::visit::Action::Cancel
+ {
+ return Ok(());
+ }
+ if needs_second_pass {
+ let is_limited = if self.rewrites.limit == 0 {
+ false
+ } else {
+ let (num_src, num_dst) =
+ estimate_involved_items(self.items.iter().map(|item| (item.emitted, item.change.kind())), kind);
+ let permutations = num_src * num_dst;
+ if permutations > self.rewrites.limit {
+ match kind {
+ visit::SourceKind::Rename => {
+ out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit = permutations;
+ }
+ visit::SourceKind::Copy => {
+ out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit = permutations;
+ }
+ }
+ true
+ } else {
+ false
+ }
+ };
+ if !is_limited {
+ self.match_pairs(cb, percentage, kind, out, diff_cache, objects)?;
+ }
+ }
+ Ok(())
+ }
+
+ fn match_pairs(
+ &mut self,
+ cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_>>) -> crate::tree::visit::Action,
+ percentage: Option<f32>,
+ kind: visit::SourceKind,
+ stats: &mut Outcome,
+ diff_cache: &mut crate::blob::Platform,
+ objects: &impl gix_object::FindObjectOrHeader,
+ ) -> Result<crate::tree::visit::Action, emit::Error> {
+ let mut dest_ofs = 0;
+ while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
+ (!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item))
+ }) {
+ dest_idx += dest_ofs;
+ dest_ofs = dest_idx + 1;
+ let src = find_match(
+ &self.items,
+ dest,
+ dest_idx,
+ percentage,
+ kind,
+ stats,
+ objects,
+ diff_cache,
+ &self.path_backing,
+ )?
+ .map(|(src_idx, src, diff)| {
+ let (id, entry_mode) = src.change.id_and_entry_mode();
+ let id = id.to_owned();
+ let location = src.location(&self.path_backing);
+ (
+ visit::Source {
+ entry_mode,
+ id,
+ kind,
+ location,
+ diff,
+ },
+ src_idx,
+ )
+ });
+ if src.is_none() {
+ continue;
+ }
+ let location = dest.location(&self.path_backing);
+ let change = dest.change.clone();
+ let dest = visit::Destination { change, location };
+ self.items[dest_idx].emitted = true;
+ if let Some(src_idx) = src.as_ref().map(|t| t.1) {
+ self.items[src_idx].emitted = true;
+ }
+ if cb(dest, src.map(|t| t.0)) == crate::tree::visit::Action::Cancel {
+ return Ok(crate::tree::visit::Action::Cancel);
+ }
+ }
+ Ok(crate::tree::visit::Action::Continue)
+ }
+}
+
+/// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation.
+fn estimate_involved_items(
+ items: impl IntoIterator<Item = (bool, ChangeKind)>,
+ kind: visit::SourceKind,
+) -> (usize, usize) {
+ items
+ .into_iter()
+ .filter(|(emitted, _)| match kind {
+ visit::SourceKind::Rename => !*emitted,
+ visit::SourceKind::Copy => true,
+ })
+ .fold((0, 0), |(mut src, mut dest), (emitted, change_kind)| {
+ match change_kind {
+ ChangeKind::Addition => {
+ if kind == visit::SourceKind::Rename || !emitted {
+ dest += 1;
+ }
+ }
+ ChangeKind::Deletion => {
+ if kind == visit::SourceKind::Rename {
+ src += 1
+ }
+ }
+ ChangeKind::Modification => {
+ if kind == visit::SourceKind::Copy {
+ src += 1
+ }
+ }
+ }
+ (src, dest)
+ })
+}
+
+fn needs_exact_match(percentage: Option<f32>) -> bool {
+ percentage.map_or(true, |p| p >= 1.0)
+}
+
+/// <`src_idx`, src, possibly diff stat>
+type SourceTuple<'a, T> = (usize, &'a Item<T>, Option<DiffLineStats>);
+
+/// Find `item` in our set of items ignoring `item_idx` to avoid finding ourselves, by similarity indicated by `percentage`.
+/// The latter can be `None` or `Some(x)` where `x>=1` for identity, and anything else for similarity.
+/// We also ignore emitted items entirely.
+/// Use `kind` to indicate what kind of match we are looking for, which might be deletions matching an `item` addition, or
+/// any non-deletion otherwise.
+/// Note that we always try to find by identity first even if a percentage is given as it's much faster and may reduce the set
+/// of items to be searched.
+#[allow(clippy::too_many_arguments)]
+fn find_match<'a, T: Change>(
+ items: &'a [Item<T>],
+ item: &Item<T>,
+ item_idx: usize,
+ percentage: Option<f32>,
+ kind: visit::SourceKind,
+ stats: &mut Outcome,
+ objects: &impl gix_object::FindObjectOrHeader,
+ diff_cache: &mut crate::blob::Platform,
+ path_backing: &[u8],
+) -> Result<Option<SourceTuple<'a, T>>, emit::Error> {
+ let (item_id, item_mode) = item.change.id_and_entry_mode();
+ if needs_exact_match(percentage) || item_mode.is_link() {
+ let first_idx = items.partition_point(|a| a.change.id() < item_id);
+ let range = match items.get(first_idx..).map(|items| {
+ let end = items
+ .iter()
+ .position(|a| a.change.id() != item_id)
+ .map_or(items.len(), |idx| first_idx + idx);
+ first_idx..end
+ }) {
+ Some(range) => range,
+ None => return Ok(None),
+ };
+ if range.is_empty() {
+ return Ok(None);
+ }
+ let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| {
+ src_idx += range.start;
+ (src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None))
+ });
+ if let Some(src) = res {
+ return Ok(Some(src));
+ }
+ } else {
+ let mut has_new = false;
+ let percentage = percentage.expect("it's set to something below 1.0 and we assured this");
+ debug_assert_eq!(
+ item.change.entry_mode().kind(),
+ EntryKind::Blob,
+ "symlinks are matched exactly, and trees aren't used here"
+ );
+
+ for (can_idx, src) in items
+ .iter()
+ .enumerate()
+ .filter(|(src_idx, src)| *src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode))
+ {
+ if !has_new {
+ diff_cache.set_resource(
+ item_id.to_owned(),
+ item_mode.kind(),
+ item.location(path_backing),
+ ResourceKind::NewOrDestination,
+ objects,
+ )?;
+ has_new = true;
+ }
+ let (src_id, src_mode) = src.change.id_and_entry_mode();
+ diff_cache.set_resource(
+ src_id.to_owned(),
+ src_mode.kind(),
+ src.location(path_backing),
+ ResourceKind::OldOrSource,
+ objects,
+ )?;
+ let prep = diff_cache.prepare_diff()?;
+ stats.num_similarity_checks += 1;
+ match prep.operation {
+ Operation::InternalDiff { algorithm } => {
+ let tokens =
+ crate::blob::intern::InternedInput::new(prep.old.intern_source(), prep.new.intern_source());
+ let counts = crate::blob::diff(
+ algorithm,
+ &tokens,
+ crate::blob::sink::Counter::new(diff::Statistics {
+ removed_bytes: 0,
+ input: &tokens,
+ }),
+ );
+ let old_data_len = prep.old.data.as_slice().unwrap_or_default().len();
+ let new_data_len = prep.new.data.as_slice().unwrap_or_default().len();
+ let similarity = (old_data_len - counts.wrapped) as f32 / old_data_len.max(new_data_len) as f32;
+ if similarity >= percentage {
+ return Ok(Some((
+ can_idx,
+ src,
+ DiffLineStats {
+ removals: counts.removals,
+ insertions: counts.insertions,
+ before: tokens.before.len().try_into().expect("interner handles only u32"),
+ after: tokens.after.len().try_into().expect("interner handles only u32"),
+ similarity,
+ }
+ .into(),
+ )));
+ }
+ }
+ Operation::ExternalCommand { .. } => {
+ unreachable!("we have disabled this possibility with an option")
+ }
+ Operation::SourceOrDestinationIsBinary => {
+ // TODO: figure out if git does more here
+ }
+ };
+ }
+ }
+ Ok(None)
+}
+
+mod diff {
+ use std::ops::Range;
+
+ pub struct Statistics<'a, 'data> {
+ pub removed_bytes: usize,
+ pub input: &'a crate::blob::intern::InternedInput<&'data [u8]>,
+ }
+
+ impl<'a, 'data> crate::blob::Sink for Statistics<'a, 'data> {
+ type Out = usize;
+
+ fn process_change(&mut self, before: Range<u32>, _after: Range<u32>) {
+ self.removed_bytes = self.input.before[before.start as usize..before.end as usize]
+ .iter()
+ .map(|token| self.input.interner[*token].len())
+ .sum();
+ }
+
+ fn finish(self) -> Self::Out {
+ self.removed_bytes
+ }
+ }
+}
+
+#[cfg(test)]
+mod estimate_involved_items {
+ use super::estimate_involved_items;
+ use crate::rewrites::tracker::{visit::SourceKind, ChangeKind};
+
+ #[test]
+ fn renames_count_unemitted_as_sources_and_destinations() {
+ let items = [
+ (false, ChangeKind::Addition),
+ (true, ChangeKind::Deletion),
+ (true, ChangeKind::Deletion),
+ ];
+ assert_eq!(
+ estimate_involved_items(items, SourceKind::Rename),
+ (0, 1),
+ "here we only have one eligible source, hence nothing to do"
+ );
+ assert_eq!(
+ estimate_involved_items(items.into_iter().map(|t| (false, t.1)), SourceKind::Rename),
+ (2, 1),
+ "now we have more possibilities as renames count un-emitted deletions as source"
+ );
+ }
+
+ #[test]
+ fn copies_do_not_count_additions_as_sources() {
+ let items = [
+ (false, ChangeKind::Addition),
+ (true, ChangeKind::Addition),
+ (true, ChangeKind::Deletion),
+ ];
+ assert_eq!(
+ estimate_involved_items(items, SourceKind::Copy),
+ (0, 1),
+ "one addition as source, the other isn't counted as it's emitted, nor is it considered a copy-source.\
+ deletions don't count"
+ );
+ }
+
+ #[test]
+ fn copies_count_modifications_as_sources() {
+ let items = [
+ (false, ChangeKind::Addition),
+ (true, ChangeKind::Modification),
+ (false, ChangeKind::Modification),
+ ];
+ assert_eq!(
+ estimate_involved_items(items, SourceKind::Copy),
+ (2, 1),
+ "any modifications is a valid source, emitted or not"
+ );
+ }
+}
diff --git a/vendor/gix-diff/src/tree/changes.rs b/vendor/gix-diff/src/tree/changes.rs
index 16e8f7873..ee86bd8bc 100644
--- a/vendor/gix-diff/src/tree/changes.rs
+++ b/vendor/gix-diff/src/tree/changes.rs
@@ -1,7 +1,6 @@
use std::{borrow::BorrowMut, collections::VecDeque};
-use gix_hash::{oid, ObjectId};
-use gix_object::tree::EntryRef;
+use gix_object::{tree::EntryRef, FindExt};
use crate::{
tree,
@@ -12,11 +11,8 @@ use crate::{
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
- #[error("The object {oid} referenced by the tree or the tree itself was not found in the database")]
- FindExisting {
- oid: ObjectId,
- source: Box<dyn std::error::Error + Send + Sync + 'static>,
- },
+ #[error(transparent)]
+ Find(#[from] gix_object::find::existing_iter::Error),
#[error("The delegate cancelled the operation")]
Cancelled,
#[error(transparent)]
@@ -24,12 +20,12 @@ pub enum Error {
}
impl<'a> tree::Changes<'a> {
- /// Calculate the changes that would need to be applied to `self` to get `other`.
+ /// Calculate the changes that would need to be applied to `self` to get `other` using `objects` to obtain objects as needed for traversal.
///
/// * The `state` maybe owned or mutably borrowed to allow reuses allocated data structures through multiple runs.
/// * `locate` is a function `f(object_id, &mut buffer) -> Option<TreeIter>` to return a `TreeIter` for the given object id backing
/// its data in the given buffer. Returning `None` is unexpected as these trees are obtained during iteration, and in a typical
- /// database errors are not expected either which is why the error case is omitted. To allow proper error reporting, [`Error::FindExisting`]
+ /// database errors are not expected either which is why the error case is omitted. To allow proper error reporting, [`Error::Find`]
/// should be converted into a more telling error.
/// * `delegate` will receive the computed changes, see the [`Visit`][`tree::Visit`] trait for more information on what to expect.
///
@@ -47,16 +43,14 @@ impl<'a> tree::Changes<'a> {
///
/// [git_cmp_c]: https://github.com/git/git/blob/311531c9de557d25ac087c1637818bd2aad6eb3a/tree-diff.c#L49:L65
/// [git_cmp_rs]: https://github.com/Byron/gitoxide/blob/a4d5f99c8dc99bf814790928a3bf9649cd99486b/gix-object/src/mutable/tree.rs#L52-L55
- pub fn needed_to_obtain<FindFn, R, StateMut, E>(
+ pub fn needed_to_obtain<R, StateMut>(
mut self,
other: gix_object::TreeRefIter<'_>,
mut state: StateMut,
- mut find: FindFn,
+ objects: impl gix_object::Find,
delegate: &mut R,
) -> Result<(), Error>
where
- FindFn: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<gix_object::TreeRefIter<'b>, E>,
- E: std::error::Error + Send + Sync + 'static,
R: tree::Visit,
StateMut: BorrowMut<tree::State>,
{
@@ -77,28 +71,16 @@ impl<'a> tree::Changes<'a> {
match state.trees.pop_front() {
Some((None, Some(rhs))) => {
delegate.pop_front_tracked_path_and_set_current();
- rhs_entries = peekable(find(&rhs, &mut state.buf2).map_err(|err| Error::FindExisting {
- oid: rhs,
- source: err.into(),
- })?);
+ rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
}
Some((Some(lhs), Some(rhs))) => {
delegate.pop_front_tracked_path_and_set_current();
- lhs_entries = peekable(find(&lhs, &mut state.buf1).map_err(|err| Error::FindExisting {
- oid: lhs,
- source: err.into(),
- })?);
- rhs_entries = peekable(find(&rhs, &mut state.buf2).map_err(|err| Error::FindExisting {
- oid: rhs,
- source: err.into(),
- })?);
+ lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
+ rhs_entries = peekable(objects.find_tree_iter(&rhs, &mut state.buf2)?);
}
Some((Some(lhs), None)) => {
delegate.pop_front_tracked_path_and_set_current();
- lhs_entries = peekable(find(&lhs, &mut state.buf1).map_err(|err| Error::FindExisting {
- oid: lhs,
- source: err.into(),
- })?);
+ lhs_entries = peekable(objects.find_tree_iter(&lhs, &mut state.buf1)?);
}
Some((None, None)) => unreachable!("BUG: it makes no sense to fill the stack with empties"),
None => return Ok(()),
@@ -267,9 +249,8 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>(
queue: &mut VecDeque<TreeInfoPair>,
delegate: &mut R,
) -> Result<(), Error> {
- use gix_object::tree::EntryMode::*;
- match (lhs.mode, rhs.mode) {
- (Tree, Tree) => {
+ match (lhs.mode.is_tree(), rhs.mode.is_tree()) {
+ (true, true) => {
delegate.push_back_tracked_path_component(lhs.filename);
if lhs.oid != rhs.oid
&& delegate
@@ -285,7 +266,7 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>(
}
queue.push_back((Some(lhs.oid.to_owned()), Some(rhs.oid.to_owned())));
}
- (_, Tree) => {
+ (_, true) => {
delegate.push_back_tracked_path_component(lhs.filename);
if delegate
.visit(Change::Deletion {
@@ -307,7 +288,7 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>(
};
queue.push_back((None, Some(rhs.oid.to_owned())));
}
- (Tree, _) => {
+ (true, _) => {
delegate.push_back_tracked_path_component(lhs.filename);
if delegate
.visit(Change::Deletion {
@@ -329,9 +310,9 @@ fn handle_lhs_and_rhs_with_equal_filenames<R: tree::Visit>(
};
queue.push_back((Some(lhs.oid.to_owned()), None));
}
- (lhs_non_tree, rhs_non_tree) => {
+ (false, false) => {
delegate.push_path_component(lhs.filename);
- debug_assert!(lhs_non_tree.is_no_tree() && rhs_non_tree.is_no_tree());
+ debug_assert!(lhs.mode.is_no_tree() && lhs.mode.is_no_tree());
if lhs.oid != rhs.oid
&& delegate
.visit(Change::Modification {
@@ -359,7 +340,7 @@ fn peekable<I: Iterator>(iter: I) -> IteratorType<I> {
mod tests {
use std::cmp::Ordering;
- use gix_object::tree::EntryMode;
+ use gix_object::tree::EntryKind;
use super::*;
@@ -368,12 +349,12 @@ mod tests {
let null = gix_hash::ObjectId::null(gix_hash::Kind::Sha1);
let actual = compare(
&EntryRef {
- mode: EntryMode::Blob,
+ mode: EntryKind::Blob.into(),
filename: "plumbing-cli.rs".into(),
oid: &null,
},
&EntryRef {
- mode: EntryMode::Tree,
+ mode: EntryKind::Tree.into(),
filename: "plumbing".into(),
oid: &null,
},
@@ -381,12 +362,12 @@ mod tests {
assert_eq!(actual, Ordering::Less);
let actual = compare(
&EntryRef {
- mode: EntryMode::Tree,
+ mode: EntryKind::Tree.into(),
filename: "plumbing-cli.rs".into(),
oid: &null,
},
&EntryRef {
- mode: EntryMode::Blob,
+ mode: EntryKind::Blob.into(),
filename: "plumbing".into(),
oid: &null,
},
diff --git a/vendor/gix-diff/src/tree/visit.rs b/vendor/gix-diff/src/tree/visit.rs
index 82e38931d..a113d46b1 100644
--- a/vendor/gix-diff/src/tree/visit.rs
+++ b/vendor/gix-diff/src/tree/visit.rs
@@ -92,6 +92,46 @@ pub trait Visit {
fn visit(&mut self, change: Change) -> Action;
}
+#[cfg(feature = "blob")]
+mod change_impls {
+ use gix_hash::oid;
+ use gix_object::tree::EntryMode;
+
+ use crate::{rewrites::tracker::ChangeKind, tree::visit::Change};
+
+ impl crate::rewrites::tracker::Change for crate::tree::visit::Change {
+ fn id(&self) -> &oid {
+ match self {
+ Change::Addition { oid, .. } | Change::Deletion { oid, .. } | Change::Modification { oid, .. } => oid,
+ }
+ }
+
+ fn kind(&self) -> ChangeKind {
+ match self {
+ Change::Addition { .. } => ChangeKind::Addition,
+ Change::Deletion { .. } => ChangeKind::Deletion,
+ Change::Modification { .. } => ChangeKind::Modification,
+ }
+ }
+
+ fn entry_mode(&self) -> EntryMode {
+ match self {
+ Change::Addition { entry_mode, .. }
+ | Change::Deletion { entry_mode, .. }
+ | Change::Modification { entry_mode, .. } => *entry_mode,
+ }
+ }
+
+ fn id_and_entry_mode(&self) -> (&oid, EntryMode) {
+ match self {
+ Change::Addition { entry_mode, oid, .. }
+ | Change::Deletion { entry_mode, oid, .. }
+ | Change::Modification { entry_mode, oid, .. } => (oid, *entry_mode),
+ }
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;