diff --git a/.gitignore b/.gitignore index 96ef6c0..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ /target -Cargo.lock diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e692479 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,199 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "diffy" +version = "0.4.2" +dependencies = [ + "anstyle", + "snapbox", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "snapbox" +version = "0.6.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c1abc378119f77310836665f8523018532cf7e3faeb3b10b01da5a7321bf8e1" +dependencies = [ + "anstream", + "anstyle", + "normalize-line-endings", + "similar", + "snapbox-macros", +] + +[[package]] +name = "snapbox-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b750c344002d7cc69afb9da00ebd9b5c0f8ac2eb7d115d9d45d5b5f47718d74" +dependencies = [ + "anstream", +] + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" diff --git a/src/lib.rs b/src/lib.rs index b582785..33ebbb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -226,6 +226,7 @@ mod apply; mod diff; mod merge; mod patch; +pub mod patch_set; mod range; mod utils; diff --git a/src/patch/mod.rs b/src/patch/mod.rs index e228f70..a26b095 100644 --- a/src/patch/mod.rs +++ b/src/patch/mod.rs @@ -1,6 +1,6 @@ pub(crate) mod error; mod format; -mod parse; +pub(crate) mod parse; #[cfg(feature = "color")] mod style; #[cfg(test)] @@ -69,6 +69,14 @@ impl<'a, T: ToOwned + ?Sized> Patch<'a, T> { } } + pub(crate) fn original_path(&self) -> Option<&Cow<'a, T>> { + self.original.as_ref().map(|f| &f.0) + } + + pub(crate) fn modified_path(&self) -> Option<&Cow<'a, T>> { + self.modified.as_ref().map(|f| &f.0) + } + /// Return the name of the old file pub fn original(&self) -> Option<&T> { self.original.as_ref().map(AsRef::as_ref) diff --git a/src/patch/parse.rs b/src/patch/parse.rs index 2b9c545..dd74b15 100644 --- a/src/patch/parse.rs +++ b/src/patch/parse.rs @@ -54,15 +54,32 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> { } pub fn parse(input: &str) -> Result> { + let (result, _consumed) = parse_one(input); + result +} + +/// Parses one patch from input. +/// +/// Always returns consumed bytes alongside the result +/// so callers can advance past the parsed or partially parsed content. +pub(crate) fn parse_one(input: &str) -> (Result>, usize) { let mut parser = Parser::new(input); - let header = patch_header(&mut parser)?; - let hunks = hunks(&mut parser)?; - Ok(Patch::new( + let header = match patch_header(&mut parser) { + Ok(h) => h, + Err(e) => return (Err(e), parser.offset()), + }; + let hunks = match hunks(&mut parser) { + Ok(h) => h, + Err(e) => return (Err(e), parser.offset()), + }; + + let patch = Patch::new( header.0.map(convert_cow_to_str), header.1.map(convert_cow_to_str), hunks, - )) + ); + (Ok(patch), parser.offset()) } pub fn parse_strict(input: &str) -> Result> { diff --git a/src/patch_set/error.rs b/src/patch_set/error.rs new file mode 100644 index 0000000..ba60e86 --- /dev/null +++ b/src/patch_set/error.rs @@ -0,0 +1,92 @@ +//! Error types for patches parsing. + +use std::fmt; +use std::ops::Range; + +use crate::patch::ParsePatchError; + +/// An error returned when parsing patches fails. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PatchSetParseError { + pub(crate) kind: PatchSetParseErrorKind, + span: Option>, +} + +impl PatchSetParseError { + /// Creates a new error with the given kind and span. + pub(crate) fn new(kind: PatchSetParseErrorKind, span: Range) -> Self { + Self { + kind, + span: Some(span), + } + } + + /// Sets the byte range span for this error. + pub(crate) fn set_span(&mut self, span: Range) { + self.span = Some(span); + } +} + +impl fmt::Display for PatchSetParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(span) = &self.span { + write!( + f, + "error parsing patches at byte {}: {}", + span.start, self.kind + ) + } else { + write!(f, "error parsing patches: {}", self.kind) + } + } +} + +impl std::error::Error for PatchSetParseError {} + +impl From for PatchSetParseError { + fn from(kind: PatchSetParseErrorKind) -> Self { + Self { kind, span: None } + } +} + +/// The kind of error that occurred when parsing patches. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub(crate) enum PatchSetParseErrorKind { + /// Single patch parsing failed. + Patch(ParsePatchError), + + /// No valid patches found in input. + NoPatchesFound, + + /// Patch has no file path. + NoFilePath, + + /// Patch has both original and modified as /dev/null. + BothDevNull, + + /// Delete patch missing original path. + DeleteMissingOriginalPath, + + /// Create patch missing modified path. + CreateMissingModifiedPath, +} + +impl fmt::Display for PatchSetParseErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Patch(e) => write!(f, "{e}"), + Self::NoPatchesFound => write!(f, "no valid patches found"), + Self::NoFilePath => write!(f, "patch has no file path"), + Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"), + Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"), + Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"), + } + } +} + +impl From for PatchSetParseError { + fn from(e: ParsePatchError) -> Self { + PatchSetParseErrorKind::Patch(e).into() + } +} diff --git a/src/patch_set/mod.rs b/src/patch_set/mod.rs new file mode 100644 index 0000000..7e15a6e --- /dev/null +++ b/src/patch_set/mod.rs @@ -0,0 +1,286 @@ +//! Utilities for parsing unified diff patches containing multiple files. +//! +//! This module provides [`PatchSet`] for parsing patches that contain changes +//! to multiple files, like the output of `git diff` or `git format-patch`. + +pub(crate) mod error; +mod parse; +#[cfg(test)] +mod tests; + +use std::borrow::Cow; + +use crate::Patch; + +pub use error::PatchSetParseError; +pub use parse::PatchSet; + +/// Options for parsing patch content. +/// +/// Use [`ParseOptions::unidiff()`] to create options for the desired format. +/// +/// ## Example +/// +/// ``` +/// use diffy::patch_set::{PatchSet, ParseOptions}; +/// +/// let s = "\ +/// --- original +/// +++ modified +/// @@ -1 +1 @@ +/// -old +/// +new +/// "; +/// +/// let patches: Vec<_> = PatchSet::parse(s, ParseOptions::unidiff()) +/// .collect::>() +/// .unwrap(); +/// assert_eq!(patches.len(), 1); +/// ``` +#[derive(Debug, Clone)] +pub struct ParseOptions { + pub(crate) format: Format, +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum Format { + /// Standard unified diff format. + UniDiff, +} + +impl ParseOptions { + /// Parse as standard [unified diff] format. + /// + /// Supported: + /// + /// * `---`/`+++` file headers + /// * `@@ ... @@` hunks + /// * modify and rename files + /// * create files (`--- /dev/null`) + /// * delete files (`+++ /dev/null`) + /// * Skip preamble, headers, and email signature trailer + /// + /// [unified diff]: https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html + pub fn unidiff() -> Self { + Self { + format: Format::UniDiff, + } + } +} + +/// File mode extracted from git extended headers. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FileMode { + /// `100644` regular file + Regular, + /// `100755` executable file + Executable, + /// `120000` symlink + Symlink, + /// `160000` gitlink (submodule) + Gitlink, +} + +/// The kind of patch content in a [`FilePatch`]. +#[derive(Clone, PartialEq, Eq)] +pub enum PatchKind<'a, T: ToOwned + ?Sized> { + /// Text patch with hunks. + Text(Patch<'a, T>), +} + +impl std::fmt::Debug for PatchKind<'_, T> +where + T: ToOwned + std::fmt::Debug, + O: std::borrow::Borrow + std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PatchKind::Text(patch) => f.debug_tuple("Text").field(patch).finish(), + } + } +} + +impl<'a, T: ToOwned + ?Sized> PatchKind<'a, T> { + /// Returns the text patch, or `None` if this is a binary patch. + pub fn as_text(&self) -> Option<&Patch<'a, T>> { + match self { + PatchKind::Text(patch) => Some(patch), + } + } +} + +/// A single file's patch with operation metadata. +/// +/// This combines a [`PatchKind`] with a [`FileOperation`] +/// that indicates what kind of file operation this patch represents +/// (create, delete, modify, or rename). +#[derive(Clone, PartialEq, Eq)] +pub struct FilePatch<'a, T: ToOwned + ?Sized> { + operation: FileOperation<'a>, + kind: PatchKind<'a, T>, + old_mode: Option, + new_mode: Option, +} + +impl std::fmt::Debug for FilePatch<'_, T> +where + T: ToOwned + std::fmt::Debug, + O: std::borrow::Borrow + std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("FilePatch") + .field("operation", &self.operation) + .field("kind", &self.kind) + .field("old_mode", &self.old_mode) + .field("new_mode", &self.new_mode) + .finish() + } +} + +impl<'a, T: ToOwned + ?Sized> FilePatch<'a, T> { + fn new( + operation: FileOperation<'a>, + patch: Patch<'a, T>, + old_mode: Option, + new_mode: Option, + ) -> Self { + Self { + operation, + kind: PatchKind::Text(patch), + old_mode, + new_mode, + } + } + + /// Returns the file operation for this patch. + pub fn operation(&self) -> &FileOperation<'a> { + &self.operation + } + + /// Returns the patch content. + pub fn patch(&self) -> &PatchKind<'a, T> { + &self.kind + } + + /// Consumes the [`FilePatch`] and returns the underlying [`PatchKind`]. + pub fn into_patch(self) -> PatchKind<'a, T> { + self.kind + } + + /// Returns the file mode before applying this patch (when known). + /// + /// This is typically populated for + /// + /// * mode changes (`old mode ` header) + /// * deletions (`deleted file mode ` header) + pub fn old_mode(&self) -> Option<&FileMode> { + self.old_mode.as_ref() + } + + /// Returns the file mode **after** applying this patch (when known). + /// + /// This is typically populated for + /// + /// * mode changes (the `new mode ` header) + /// * creations (the `new file mode ` header) + pub fn new_mode(&self) -> Option<&FileMode> { + self.new_mode.as_ref() + } +} + +/// The operation to perform based on a patch. +/// +/// This is determined by examining the `---` and `+++` header lines +/// of a unified diff patch, and git extended headers when available. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FileOperation<'a> { + /// Delete a file (`+++ /dev/null`). + Delete(Cow<'a, str>), + /// Create a new file (`--- /dev/null`). + Create(Cow<'a, str>), + /// Modify a file. + /// + /// * If `original == modified`, this is an in-place modification. + /// * If they differ, the caller decides how to handle, e.g., treat as rename or error. + /// + /// Usually, the caller needs to strip the prefix from the paths to determine. + Modify { + original: Cow<'a, str>, + modified: Cow<'a, str>, + }, + /// Rename a file (move from `from` to `to`, delete `from`). + /// + /// Only produced when git extended headers explicitly indicate a rename. + Rename { + from: Cow<'a, str>, + to: Cow<'a, str>, + }, + /// Copy a file (copy from `from` to `to`, keep `from`). + /// + /// Only produced when git extended headers explicitly indicate a copy. + Copy { + from: Cow<'a, str>, + to: Cow<'a, str>, + }, +} + +impl FileOperation<'_> { + /// Strip the first `n` path components from the paths in this operation. + /// + /// This is similar to the `-p` option in GNU patch. For example, + /// `strip_prefix(1)` on a path `a/src/lib.rs` would return `src/lib.rs`. + pub fn strip_prefix(&self, n: usize) -> FileOperation<'_> { + fn strip(path: &str, n: usize) -> &str { + let mut remaining = path; + for _ in 0..n { + match remaining.split_once('/') { + Some((_first, rest)) => remaining = rest, + None => return remaining, + } + } + remaining + } + + match self { + FileOperation::Delete(path) => FileOperation::Delete(Cow::Borrowed(strip(path, n))), + FileOperation::Create(path) => FileOperation::Create(Cow::Borrowed(strip(path, n))), + FileOperation::Modify { original, modified } => FileOperation::Modify { + original: Cow::Borrowed(strip(original, n)), + modified: Cow::Borrowed(strip(modified, n)), + }, + FileOperation::Rename { from, to } => FileOperation::Rename { + from: Cow::Borrowed(strip(from, n)), + to: Cow::Borrowed(strip(to, n)), + }, + FileOperation::Copy { from, to } => FileOperation::Copy { + from: Cow::Borrowed(strip(from, n)), + to: Cow::Borrowed(strip(to, n)), + }, + } + } + + /// Returns `true` if this is a file creation operation. + pub fn is_create(&self) -> bool { + matches!(self, FileOperation::Create(_)) + } + + /// Returns `true` if this is a file deletion operation. + pub fn is_delete(&self) -> bool { + matches!(self, FileOperation::Delete(_)) + } + + /// Returns `true` if this is a file modification. + pub fn is_modify(&self) -> bool { + matches!(self, FileOperation::Modify { .. }) + } + + /// Returns `true` if this is a rename operation. + pub fn is_rename(&self) -> bool { + matches!(self, FileOperation::Rename { .. }) + } + + /// Returns `true` if this is a copy operation. + pub fn is_copy(&self) -> bool { + matches!(self, FileOperation::Copy { .. }) + } +} diff --git a/src/patch_set/parse.rs b/src/patch_set/parse.rs new file mode 100644 index 0000000..c4864f9 --- /dev/null +++ b/src/patch_set/parse.rs @@ -0,0 +1,223 @@ +//! Parse multiple file patches from a unified diff. + +use super::{ + error::PatchSetParseErrorKind, FileOperation, FilePatch, Format, ParseOptions, + PatchSetParseError, +}; +use crate::patch::parse::parse_one; + +use std::borrow::Cow; + +/// Prefix for the original file path (e.g., `--- a/file.rs`). +const ORIGINAL_PREFIX: &str = "--- "; +/// Prefix for the modified file path (e.g., `+++ b/file.rs`). +const MODIFIED_PREFIX: &str = "+++ "; +/// Path used to indicate file creation or deletion. +const DEV_NULL: &str = "/dev/null"; + +/// Separator between commit message and patch in git format-patch output. +const EMAIL_PREAMBLE_SEPARATOR: &str = "\n---\n"; + +/// Streaming iterator for parsing patches one at a time. +/// +/// Created by [`PatchSet::parse`]. +/// +/// # Example +/// +/// ``` +/// use diffy::patch_set::{PatchSet, ParseOptions}; +/// +/// let s = "\ +/// --- original +/// +++ modified +/// @@ -1 +1 @@ +/// -old +/// +new +/// --- original2 +/// +++ modified2 +/// @@ -1 +1 @@ +/// -foo +/// +bar +/// "; +/// +/// for patch in PatchSet::parse(s, ParseOptions::unidiff()) { +/// let patch = patch.unwrap(); +/// println!("{:?}", patch.operation()); +/// } +/// ``` +pub struct PatchSet<'a> { + input: &'a str, + offset: usize, + opts: ParseOptions, + finished: bool, + found_any: bool, +} + +impl<'a> PatchSet<'a> { + /// Creates a streaming parser for multiple file patches. + pub fn parse(input: &'a str, opts: ParseOptions) -> Self { + // Strip email preamble once at construction + let input = strip_email_preamble(input); + Self { + input, + offset: 0, + opts, + finished: false, + found_any: false, + } + } + + /// Creates an error with the current offset as span. + fn error(&self, kind: PatchSetParseErrorKind) -> PatchSetParseError { + PatchSetParseError::new(kind, self.offset..self.offset) + } + + fn next_unidiff_patch(&mut self) -> Option, PatchSetParseError>> { + let remaining = &self.input[self.offset..]; + if remaining.is_empty() { + return None; + } + + let patch_start = find_patch_start(remaining)?; + self.found_any = true; + + let patch_input = &remaining[patch_start..]; + + let (result, consumed) = parse_one(patch_input); + // Always advance so the iterator makes progress even on error. + let abs_patch_start = self.offset + patch_start; + self.offset += patch_start + consumed; + + let patch = match result { + Ok(patch) => patch, + Err(e) => return Some(Err(e.into())), + }; + let operation = match extract_file_op_unidiff(patch.original_path(), patch.modified_path()) + { + Ok(op) => op, + Err(mut e) => { + e.set_span(abs_patch_start..abs_patch_start); + return Some(Err(e)); + } + }; + + Some(Ok(FilePatch::new(operation, patch, None, None))) + } +} + +impl<'a> Iterator for PatchSet<'a> { + type Item = Result, PatchSetParseError>; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let result = match self.opts.format { + Format::UniDiff => { + let result = self.next_unidiff_patch(); + if result.is_none() { + self.finished = true; + if !self.found_any { + return Some(Err(self.error(PatchSetParseErrorKind::NoPatchesFound))); + } + } + result + } + }; + + result + } +} + +/// Finds the byte offset of the first patch header in the input. +/// +/// A patch header starts with `--- ` or `+++ ` (the file path lines). +/// Returns `None` if no header is found. +fn find_patch_start(input: &str) -> Option { + let mut offset = 0; + for line in input.lines() { + if line.starts_with(ORIGINAL_PREFIX) || line.starts_with(MODIFIED_PREFIX) { + return Some(offset); + } + offset += line.len(); + // Account for the line ending that `.lines()` strips + if input[offset..].starts_with("\r\n") { + offset += 2; + } else if input[offset..].starts_with('\n') { + offset += 1; + } + } + None +} + +/// Strips email preamble (headers and commit message) from `git format-patch` output. +/// +/// Returns the content after the first `\n---\n` separator. +/// +/// ## Observed git behavior +/// +/// `git mailinfo` (used by `git am`) uses the first `---` line +/// as the separator between commit message and patch content. +/// It does not check if `diff --git` follows or there are more `---` lines. +/// +/// From [`git format-patch`] manpage: +/// +/// > The log message and the patch are separated by a line with a three-dash line. +/// +/// [`git format-patch`]: https://git-scm.com/docs/git-format-patch +fn strip_email_preamble(input: &str) -> &str { + // only strip preamble for mbox-formatted input + if !input.starts_with("From ") { + return input; + } + + match input.find(EMAIL_PREAMBLE_SEPARATOR) { + Some(pos) => &input[pos + EMAIL_PREAMBLE_SEPARATOR.len()..], + None => input, + } +} + +/// Extracts the file operation from a patch based on its header paths. +pub(crate) fn extract_file_op_unidiff<'a>( + original: Option<&Cow<'a, str>>, + modified: Option<&Cow<'a, str>>, +) -> Result, PatchSetParseError> { + let is_create = original.map(Cow::as_ref) == Some(DEV_NULL); + let is_delete = modified.map(Cow::as_ref) == Some(DEV_NULL); + + if is_create && is_delete { + return Err(PatchSetParseErrorKind::BothDevNull.into()); + } + + if is_delete { + let path = original.ok_or(PatchSetParseErrorKind::DeleteMissingOriginalPath)?; + Ok(FileOperation::Delete(path.clone())) + } else if is_create { + let path = modified.ok_or(PatchSetParseErrorKind::CreateMissingModifiedPath)?; + Ok(FileOperation::Create(path.clone())) + } else { + match (original, modified) { + (Some(original), Some(modified)) => Ok(FileOperation::Modify { + original: original.clone(), + modified: modified.clone(), + }), + (None, Some(modified)) => { + // No original path, but has modified path. + // Observed that GNU patch reads from the modified path in this case. + Ok(FileOperation::Modify { + original: modified.clone(), + modified: modified.clone(), + }) + } + (Some(original), None) => { + // No modified path, but has original path. + Ok(FileOperation::Modify { + modified: original.clone(), + original: original.clone(), + }) + } + (None, None) => Err(PatchSetParseErrorKind::NoFilePath.into()), + } + } +} diff --git a/src/patch_set/tests.rs b/src/patch_set/tests.rs new file mode 100644 index 0000000..5bbfd31 --- /dev/null +++ b/src/patch_set/tests.rs @@ -0,0 +1,465 @@ +//! Tests for patchset parsing. + +use super::{error::PatchSetParseErrorKind, FileOperation, ParseOptions, PatchSet}; + +mod file_operation { + use super::*; + + #[test] + fn test_strip_prefix() { + let op = FileOperation::Modify { + original: "a/src/lib.rs".to_owned().into(), + modified: "b/src/lib.rs".to_owned().into(), + }; + let stripped = op.strip_prefix(1); + assert_eq!( + stripped, + FileOperation::Modify { + original: "src/lib.rs".to_owned().into(), + modified: "src/lib.rs".to_owned().into(), + } + ); + } + + #[test] + fn test_strip_prefix_no_slash() { + let op = FileOperation::Create("file.rs".to_owned().into()); + let stripped = op.strip_prefix(1); + assert_eq!(stripped, FileOperation::Create("file.rs".to_owned().into())); + } +} + +mod patchset_unidiff { + use super::*; + + #[test] + fn single_file() { + let content = "\ +--- a/file.rs ++++ b/file.rs +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4 +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn multi_file() { + let content = "\ +--- a/file1.rs ++++ b/file1.rs +@@ -1 +1 @@ +-old1 ++new1 +--- a/file2.rs ++++ b/file2.rs +@@ -1 +1 @@ +-old2 ++new2 +"; + let patches: Vec<_> = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::>() + .unwrap(); + assert_eq!(patches.len(), 2); + assert!(patches[0].operation().is_modify()); + assert!(patches[1].operation().is_modify()); + } + + #[test] + fn with_preamble() { + let content = "\ +This is a preamble +It should be ignored +--- a/file.rs ++++ b/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn plus_plus_content_in_hunk() { + // A hunk that adds a line whose content is literally "++ foo" renders + // in the diff as "+++ foo" (the leading "+" is the add marker). + // The parser must not treat this as a patch header boundary. + let content = "\ +--- a/file1.rs ++++ b/file1.rs +@@ -1,2 +1,2 @@ + line1 +-old ++++ foo +--- a/file2.rs ++++ b/file2.rs +@@ -1 +1 @@ +-a ++b +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 2); + } + + #[test] + fn false_positive_in_hunk() { + // Line starting with "--- " inside hunk is not a patch boundary. + let content = "\ +--- a/file.rs ++++ b/file.rs +@@ -1,3 +1,3 @@ + line1 +---- this is not a patch boundary ++--- this line starts with dashes + line3 +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + } + + #[test] + fn empty_content() { + let err: Result, _> = PatchSet::parse("", ParseOptions::unidiff()).collect(); + let err = err.unwrap_err(); + assert!( + err.to_string().contains("no valid patches found"), + "unexpected error: {}", + err + ); + } + + #[test] + fn not_a_patch() { + let content = "Some random text\nNo patches here\n"; + let err: Result, _> = PatchSet::parse(content, ParseOptions::unidiff()).collect(); + let err = err.unwrap_err(); + assert!( + err.to_string().contains("no valid patches found"), + "unexpected error: {}", + err + ); + } + + #[test] + fn incomplete_header() { + // Has --- but no following +++ or @@. + // parse_one treats it as a valid (header-only, no hunks) patch, + // consistent with how GNU patch handles lone headers. + let content = "\ +--- a/file.rs +Some random text +No patches here +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn create_file() { + let content = "\ +--- /dev/null ++++ b/new.rs +@@ -0,0 +1 @@ ++content +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_create()); + assert_eq!( + patches[0].operation(), + &FileOperation::Create("b/new.rs".to_owned().into()) + ); + } + + #[test] + fn delete_file() { + let content = "\ +--- a/old.rs ++++ /dev/null +@@ -1 +0,0 @@ +-content +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_delete()); + assert_eq!( + patches[0].operation(), + &FileOperation::Delete("a/old.rs".to_owned().into()) + ); + } + + #[test] + fn different_paths() { + let content = "\ +--- a/old.rs ++++ b/new.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert_eq!( + patches[0].operation(), + &FileOperation::Modify { + original: "a/old.rs".to_owned().into(), + modified: "b/new.rs".to_owned().into(), + } + ); + } + + #[test] + fn both_dev_null_error() { + let content = "\ +--- /dev/null ++++ /dev/null +@@ -1 +1 @@ +-old ++new +"; + let result: Result, _> = PatchSet::parse(content, ParseOptions::unidiff()).collect(); + assert_eq!( + result.unwrap_err().kind, + PatchSetParseErrorKind::BothDevNull + ); + } + + #[test] + fn error_advances_past_bad_patch() { + // Iterator advances past a malformed patch and continues + // to yield subsequent valid patches (GNU patch behavior). + let content = "\ +--- /dev/null ++++ /dev/null +@@ -1 +1 @@ +-old ++new +--- a/file.rs ++++ b/file.rs +@@ -1 +1 @@ +-old ++new +"; + let items: Vec<_> = PatchSet::parse(content, ParseOptions::unidiff()).collect(); + assert_eq!(items.len(), 2); + assert!(items[0].is_err(), "first item should be the error"); + assert!(items[1].is_ok(), "second item should be the valid patch"); + } + + #[test] + fn diff_git_ignored_in_unidiff_mode() { + // In UniDiff mode, `diff --git` is noise before `---` boundary. + let content = "\ +diff --git a/file1.rs b/file1.rs +--- a/file1.rs ++++ b/file1.rs +@@ -1 +1 @@ +-old1 ++new1 +diff --git a/file2.rs b/file2.rs +--- a/file2.rs ++++ b/file2.rs +@@ -1 +1 @@ +-old2 ++new2 +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 2); + } + + #[test] + fn git_format_patch() { + // Full git format-patch output with email headers and signature. + let content = "\ +From 1234567890abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +From: Gandalf +Date: Mon, 25 Mar 3019 00:00:00 +0000 +Subject: [PATCH] fix!: destroy the one ring at mount doom + +In a hole in the ground there lived a hobbit +--- + src/frodo.rs | 2 +- + src/sam.rs | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/src/frodo.rs ++++ b/src/frodo.rs +@@ -1 +1 @@ +-finger ++peace +--- a/src/sam.rs ++++ b/src/sam.rs +@@ -1 +1,2 @@ + food ++more food +-- +2.40.0 +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 2); + assert!(patches[0].operation().is_modify()); + assert!(patches[1].operation().is_modify()); + } + + #[test] + fn missing_modified_header() { + // Only --- header, no +++ header. + let content = "\ +--- a/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn missing_original_header() { + // Only +++ header, no --- header. + let content = "\ ++++ b/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn reversed_header_order() { + // +++ before ---. + let content = "\ ++++ b/file.rs +--- a/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 1); + assert!(patches[0].operation().is_modify()); + } + + #[test] + fn multi_file_mixed_headers() { + // Various combinations of missing headers. + let content = "\ +--- a/file1.rs ++++ b/file1.rs +@@ -1 +1 @@ +-old1 ++new1 +--- a/file2.rs +@@ -1 +1 @@ +-old2 ++new2 ++++ b/file3.rs +@@ -1 +1 @@ +-old3 ++new3 +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!(patches.len(), 3); + } + + #[test] + fn missing_modified_uses_original() { + // When +++ is missing, original path is used for both. + let content = "\ +--- a/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!( + patches[0].operation(), + &FileOperation::Modify { + original: "a/file.rs".to_owned().into(), + modified: "a/file.rs".to_owned().into(), + } + ); + } + + #[test] + fn missing_original_uses_modified() { + // When --- is missing, modified path is used for both. + let content = "\ ++++ b/file.rs +@@ -1 +1 @@ +-old ++new +"; + let patches = PatchSet::parse(content, ParseOptions::unidiff()) + .collect::, _>>() + .unwrap(); + assert_eq!( + patches[0].operation(), + &FileOperation::Modify { + original: "b/file.rs".to_owned().into(), + modified: "b/file.rs".to_owned().into(), + } + ); + } + + #[test] + fn hunk_only_no_headers() { + // Only @@ header, no --- or +++ paths. + // is_unidiff_boundary requires --- or +++ to identify patch start, + // so this is not recognized as a patch at all. + let content = "\ +@@ -1 +1 @@ +-old ++new +"; + let err: Result, _> = PatchSet::parse(content, ParseOptions::unidiff()).collect(); + let err = err.unwrap_err(); + assert!( + err.to_string().contains("no valid patches found"), + "unexpected error: {}", + err + ); + } +}