Skip to content

Diff API Reference

diff

Unified diff parser, patch applicator, and three-way merge — zero dependencies, stdlib only, Python 3.10+.

Part of zerodep: https://github.com/Oaklight/zerodep Copyright (c) 2026 Peng Ding. MIT License.

Provides structured parsing of unified diffs, patch application/reversal, and three-way merge with conflict detection. Built entirely on the standard library difflib module.

Generate and round-trip a patch::

from diff import make_diff, parse_patch, apply_patch, reverse_patch

a = "hello\nworld\n"
b = "hello\nbrave new world\n"

patch_text = make_diff(a, b)
patch = parse_patch(patch_text)
assert apply_patch(a, patch) == b

rev = reverse_patch(patch)
assert apply_patch(b, rev) == a

Three-way merge::

from diff import merge3

result = merge3(base, ours, theirs)
if result.has_conflicts:
    print("Conflicts found!")
print(result.content)

DiffError

Bases: Exception

Base exception for all diff/patch operations.

Source code in diff/diff.py
class DiffError(Exception):
    """Base exception for all diff/patch operations."""

PatchParseError

Bases: DiffError

Raised when patch text has invalid or malformed format.

Attributes:

Name Type Description
line_no

1-based line number where the error was detected.

detail

Human-readable description of the issue.

Source code in diff/diff.py
class PatchParseError(DiffError):
    """Raised when patch text has invalid or malformed format.

    Attributes:
        line_no: 1-based line number where the error was detected.
        detail: Human-readable description of the issue.
    """

    def __init__(self, line_no: int, detail: str) -> None:
        self.line_no = line_no
        self.detail = detail
        super().__init__(f"line {line_no}: {detail}")

PatchApplyError

Bases: DiffError

Raised when a patch cannot be applied to the given source.

Attributes:

Name Type Description
hunk_index

0-based index of the failing hunk.

expected

The line content expected by the patch.

actual

The line content found in the source.

source_line_no

1-based line number in the source.

Source code in diff/diff.py
class PatchApplyError(DiffError):
    """Raised when a patch cannot be applied to the given source.

    Attributes:
        hunk_index: 0-based index of the failing hunk.
        expected: The line content expected by the patch.
        actual: The line content found in the source.
        source_line_no: 1-based line number in the source.
    """

    def __init__(
        self,
        hunk_index: int,
        expected: str,
        actual: str,
        source_line_no: int,
    ) -> None:
        self.hunk_index = hunk_index
        self.expected = expected
        self.actual = actual
        self.source_line_no = source_line_no
        super().__init__(
            f"hunk {hunk_index}: source line {source_line_no}: "
            f"expected {expected!r}, got {actual!r}"
        )

Hunk dataclass

A single contiguous changed region in a unified diff.

Attributes:

Name Type Description
src_start int

1-based starting line in the source file.

src_len int

Number of source lines covered by this hunk.

tgt_start int

1-based starting line in the target file.

tgt_len int

Number of target lines covered by this hunk.

lines list[tuple[str, str]]

Sequence of (tag, content) pairs where tag is one of ' ' (context), '-' (deletion), or '+' (addition) and content includes the trailing newline.

Source code in diff/diff.py
@dataclasses.dataclass
class Hunk:
    """A single contiguous changed region in a unified diff.

    Attributes:
        src_start: 1-based starting line in the source file.
        src_len: Number of source lines covered by this hunk.
        tgt_start: 1-based starting line in the target file.
        tgt_len: Number of target lines covered by this hunk.
        lines: Sequence of ``(tag, content)`` pairs where *tag* is one of
            ``' '`` (context), ``'-'`` (deletion), or ``'+'`` (addition)
            and *content* includes the trailing newline.
    """

    src_start: int
    src_len: int
    tgt_start: int
    tgt_len: int
    lines: list[tuple[str, str]]

PatchedFile dataclass

All hunks for a single file in a patch.

Attributes:

Name Type Description
source_file str | None

Source filename (/dev/null for newly added files).

target_file str | None

Target filename (/dev/null for deleted files).

hunks list[Hunk]

Ordered list of :class:Hunk instances.

Source code in diff/diff.py
@dataclasses.dataclass
class PatchedFile:
    """All hunks for a single file in a patch.

    Attributes:
        source_file: Source filename (``/dev/null`` for newly added files).
        target_file: Target filename (``/dev/null`` for deleted files).
        hunks: Ordered list of :class:`Hunk` instances.
    """

    source_file: str | None
    target_file: str | None
    hunks: list[Hunk]

    @property
    def is_added(self) -> bool:
        """True when the patch creates a new file."""
        return self.source_file is not None and self.source_file.endswith("/dev/null")

    @property
    def is_deleted(self) -> bool:
        """True when the patch deletes a file."""
        return self.target_file is not None and self.target_file.endswith("/dev/null")

is_added property

True when the patch creates a new file.

is_deleted property

True when the patch deletes a file.

Patch dataclass

A collection of :class:PatchedFile instances parsed from unified diff text.

Supports len(), iteration, and indexing.

Source code in diff/diff.py
@dataclasses.dataclass
class Patch:
    """A collection of :class:`PatchedFile` instances parsed from unified diff text.

    Supports ``len()``, iteration, and indexing.
    """

    files: list[PatchedFile]

    def __len__(self) -> int:
        return len(self.files)

    def __iter__(self):
        return iter(self.files)

    def __getitem__(self, index: int) -> PatchedFile:
        return self.files[index]

ConflictRegion dataclass

A region where concurrent edits conflict in a three-way merge.

Attributes:

Name Type Description
base_start int

0-based start index in the base lines.

base_end int

0-based exclusive end index in the base lines.

ours list[str]

Lines from the ours side.

theirs list[str]

Lines from the theirs side.

Source code in diff/diff.py
@dataclasses.dataclass
class ConflictRegion:
    """A region where concurrent edits conflict in a three-way merge.

    Attributes:
        base_start: 0-based start index in the base lines.
        base_end: 0-based exclusive end index in the base lines.
        ours: Lines from the *ours* side.
        theirs: Lines from the *theirs* side.
    """

    base_start: int
    base_end: int
    ours: list[str]
    theirs: list[str]

MergeResult dataclass

Result of a three-way merge.

Attributes:

Name Type Description
content str

The merged text. When conflicts exist the text includes conflict markers (<<<<<<<, =======, >>>>>>>).

has_conflicts bool

True when at least one conflict was detected.

conflicts list[ConflictRegion]

List of :class:ConflictRegion instances.

Source code in diff/diff.py
@dataclasses.dataclass
class MergeResult:
    """Result of a three-way merge.

    Attributes:
        content: The merged text.  When conflicts exist the text includes
            conflict markers (``<<<<<<<``, ``=======``, ``>>>>>>>``).
        has_conflicts: ``True`` when at least one conflict was detected.
        conflicts: List of :class:`ConflictRegion` instances.
    """

    content: str
    has_conflicts: bool
    conflicts: list[ConflictRegion]

make_diff(a, b, filename_a='a', filename_b='b', context=3)

Generate a unified diff between two strings.

Parameters:

Name Type Description Default
a str

Original text.

required
b str

Modified text.

required
filename_a str

Label for the original file in the diff header.

'a'
filename_b str

Label for the modified file in the diff header.

'b'
context int

Number of context lines around each change.

3

Returns:

Type Description
str

A unified diff string, or an empty string if a and b are identical.

Source code in diff/diff.py
def make_diff(
    a: str,
    b: str,
    filename_a: str = "a",
    filename_b: str = "b",
    context: int = 3,
) -> str:
    """Generate a unified diff between two strings.

    Args:
        a: Original text.
        b: Modified text.
        filename_a: Label for the original file in the diff header.
        filename_b: Label for the modified file in the diff header.
        context: Number of context lines around each change.

    Returns:
        A unified diff string, or an empty string if *a* and *b* are identical.
    """
    a_lines = a.splitlines(True)
    b_lines = b.splitlines(True)
    raw = list(
        difflib.unified_diff(
            a_lines,
            b_lines,
            fromfile=filename_a,
            tofile=filename_b,
            n=context,
        )
    )
    # difflib does not emit "\ No newline at end of file" markers.
    # Insert them after any diff body line that lacks a trailing newline.
    result: list[str] = []
    for line in raw:
        result.append(line)
        if line and not line.endswith("\n") and line[0] in (" ", "-", "+"):
            result.append("\n" + _NO_NEWLINE_MARKER + "\n")
    return "".join(result)

parse_patch(patch_text)

Parse unified diff text into a structured :class:Patch.

Parameters:

Name Type Description Default
patch_text str

The unified diff text.

required

Returns:

Name Type Description
A Patch

class:Patch containing one :class:PatchedFile per file.

Raises:

Type Description
PatchParseError

If the diff text is malformed.

Source code in diff/diff.py
def parse_patch(patch_text: str) -> Patch:
    """Parse unified diff text into a structured :class:`Patch`.

    Args:
        patch_text: The unified diff text.

    Returns:
        A :class:`Patch` containing one :class:`PatchedFile` per file.

    Raises:
        PatchParseError: If the diff text is malformed.
    """
    files: list[PatchedFile] = []
    current_file: PatchedFile | None = None
    current_hunk: Hunk | None = None
    remaining_src = 0
    remaining_tgt = 0
    source_name: str | None = None

    raw_lines = patch_text.splitlines(True)

    for line_no_0, raw in enumerate(raw_lines):
        line_no = line_no_0 + 1
        stripped = raw.rstrip("\n").rstrip("\r")

        # --- source file header
        if stripped.startswith("--- "):
            source_name = _strip_filename_prefix(stripped[4:].strip())
            current_hunk = None
            continue

        # +++ target file header
        if stripped.startswith("+++ "):
            target_name = _strip_filename_prefix(stripped[4:].strip())
            current_file = PatchedFile(
                source_file=source_name,
                target_file=target_name,
                hunks=[],
            )
            files.append(current_file)
            source_name = None
            current_hunk = None
            continue

        # @@ hunk header
        m = _HUNK_HEADER_RE.match(stripped)
        if m:
            current_hunk = _make_hunk_from_header(m, current_file, line_no)
            remaining_src = current_hunk.src_len
            remaining_tgt = current_hunk.tgt_len
            continue

        # "\ No newline at end of file"
        if stripped.startswith("\\"):
            if current_hunk and current_hunk.lines:
                tag, content = current_hunk.lines[-1]
                current_hunk.lines[-1] = (tag, content.rstrip("\n"))
            continue

        # Hunk body lines
        if current_hunk is not None and (remaining_src > 0 or remaining_tgt > 0):
            if not raw:
                continue
            remaining_src, remaining_tgt = _classify_body_line(
                raw, line_no, current_hunk, remaining_src, remaining_tgt
            )
            continue

        # Lines outside hunks (diff --git preamble, index, mode, etc.)

    return Patch(files=files)

apply_patch(source, patch)

Apply a parsed patch to source text.

Parameters:

Name Type Description Default
source str

The original text to patch.

required
patch Patch | PatchedFile

A :class:Patch (must contain exactly one file) or a single :class:PatchedFile.

required

Returns:

Type Description
str

The patched text.

Raises:

Type Description
PatchApplyError

If a context or deletion line does not match the source.

DiffError

If a :class:Patch contains more than one file.

Source code in diff/diff.py
def apply_patch(source: str, patch: Patch | PatchedFile) -> str:
    """Apply a parsed patch to source text.

    Args:
        source: The original text to patch.
        patch: A :class:`Patch` (must contain exactly one file) or a
            single :class:`PatchedFile`.

    Returns:
        The patched text.

    Raises:
        PatchApplyError: If a context or deletion line does not match
            the source.
        DiffError: If a :class:`Patch` contains more than one file.
    """
    pf, early = _resolve_patched_file(source, patch)
    if early is not None:
        return early

    source_lines = source.splitlines(True)
    output: list[str] = []
    src_idx = 0  # current position in source_lines (0-based)

    for hunk_i, hunk in enumerate(pf.hunks):
        hunk_start = hunk.src_start - 1
        output.extend(source_lines[src_idx:hunk_start])
        hunk_output, src_idx = _apply_hunk_lines(hunk_i, hunk, source_lines)
        output.extend(hunk_output)

    # Copy remaining lines after the last hunk.
    output.extend(source_lines[src_idx:])
    return "".join(output)

reverse_patch(patch)

Create a reversed copy of patch.

Applying the reversed patch to the target reproduces the original source::

assert apply_patch(b, reverse_patch(parse_patch(make_diff(a, b)))) == a

Parameters:

Name Type Description Default
patch Patch

The patch to reverse.

required

Returns:

Type Description
Patch

A new :class:Patch with swapped source/target and inverted changes.

Source code in diff/diff.py
def reverse_patch(patch: Patch) -> Patch:
    """Create a reversed copy of *patch*.

    Applying the reversed patch to the target reproduces the original source::

        assert apply_patch(b, reverse_patch(parse_patch(make_diff(a, b)))) == a

    Args:
        patch: The patch to reverse.

    Returns:
        A new :class:`Patch` with swapped source/target and inverted changes.
    """
    _TAG_SWAP = {"+": "-", "-": "+", " ": " "}

    new_files: list[PatchedFile] = []
    for pf in patch.files:
        new_hunks: list[Hunk] = []
        for hunk in pf.hunks:
            new_lines = [(_TAG_SWAP[tag], content) for tag, content in hunk.lines]
            new_hunks.append(
                Hunk(
                    src_start=hunk.tgt_start,
                    src_len=hunk.tgt_len,
                    tgt_start=hunk.src_start,
                    tgt_len=hunk.src_len,
                    lines=new_lines,
                )
            )
        new_files.append(
            PatchedFile(
                source_file=pf.target_file,
                target_file=pf.source_file,
                hunks=new_hunks,
            )
        )
    return Patch(files=new_files)

merge3(base, ours, theirs, label_ours='ours', label_theirs='theirs')

Perform a three-way merge.

Parameters:

Name Type Description Default
base str

The common ancestor text.

required
ours str

Text from the first branch.

required
theirs str

Text from the second branch.

required
label_ours str

Label for conflict markers on the ours side.

'ours'
label_theirs str

Label for conflict markers on the theirs side.

'theirs'

Returns:

Name Type Description
A MergeResult

class:MergeResult with the merged content and any conflicts.

Source code in diff/diff.py
def merge3(
    base: str,
    ours: str,
    theirs: str,
    label_ours: str = "ours",
    label_theirs: str = "theirs",
) -> MergeResult:
    """Perform a three-way merge.

    Args:
        base: The common ancestor text.
        ours: Text from the first branch.
        theirs: Text from the second branch.
        label_ours: Label for conflict markers on the *ours* side.
        label_theirs: Label for conflict markers on the *theirs* side.

    Returns:
        A :class:`MergeResult` with the merged content and any conflicts.
    """
    base_lines = base.splitlines(True)
    ours_lines = ours.splitlines(True)
    theirs_lines = theirs.splitlines(True)

    sm_ours = difflib.SequenceMatcher(None, base_lines, ours_lines)
    sm_theirs = difflib.SequenceMatcher(None, base_lines, theirs_lines)

    ours_changes = _extract_changes(sm_ours.get_opcodes(), ours_lines)
    theirs_changes = _extract_changes(sm_theirs.get_opcodes(), theirs_lines)

    merged: list[str] = []
    conflicts: list[ConflictRegion] = []
    base_pos = 0
    oi = 0
    ti = 0
    sentinel = len(base_lines) + 1

    while base_pos < sentinel:
        o_change, o_start = _peek_change(ours_changes, oi, sentinel)
        t_change, t_start = _peek_change(theirs_changes, ti, sentinel)

        if o_change is None and t_change is None:
            merged.extend(base_lines[base_pos:])
            break

        # Emit base lines before the next change.
        next_start = min(o_start, t_start)
        if next_start > base_pos:
            merged.extend(base_lines[base_pos:next_start])
            base_pos = next_start

        if o_change is not None and t_change is not None:
            base_pos, oi, ti = _apply_both_changes(
                o_change,
                t_change,
                label_ours,
                label_theirs,
                ours_changes,
                theirs_changes,
                oi,
                ti,
                merged,
                conflicts,
            )
        elif o_change is not None:
            merged.extend(o_change[2])
            base_pos = o_change[1]
            oi += 1
        elif t_change is not None:
            merged.extend(t_change[2])
            base_pos = t_change[1]
            ti += 1

    return MergeResult(
        content="".join(merged),
        has_conflicts=len(conflicts) > 0,
        conflicts=conflicts,
    )