Source code for fitzzftw.patch.parser

# File: src/fitzzftw/patch/parser.py
# Author: Fitzz TeXnik Welt
# Email: FitzzTeXnikWelt@t-online.de
# License: LGPLv2 or above

"""
Unified Diff and Patch Parsing
==============================

This module provides the core logic for deconstructing raw patch data into
structured object hierarchies. It acts as a high-performance state machine
that transforms streams of strings into validated containers.

Core Components:
----------------
* **PatchParser**:
    The central engine for parsing diff sequences. It implements a factory
    pattern for line instantiation and manages the state transition between
    file-headers, hunk-headers, and content.
* **The Sieve**:
    An optimized inline parsing logic within :meth:`~.PatchParser.iter_files`
    that validates the structural integrity of the diff format (e.g.,
    ensuring headers precede content).

Key Features:
-------------
* **Strict Validation**:
    Raises :class:`~.exceptions.PatchParseError` for malformed sequences
    to prevent processing corrupted or incomplete patch data.
* **Streaming Architecture**:
    Uses generators to process large patch files efficiently without
    loading the entire content into memory.
* **Extensibility**:
    The factory method :meth:`~.PatchParser.create_line` allows for
    customization of how individual lines are categorized and instantiated.
"""

from pathlib import Path
from typing import Generator, Iterable

from fitzzftw.patch.container import DiffCodeFile, Hunk
from fitzzftw.patch.exceptions import FtwPatchError, PatchParseError
from fitzzftw.patch.lines import HeadLine, HunkHeadLine, HunkLine, PatchLine


# SECTION - --- Parser ---
# CLASS - PatchParser
[docs] class PatchParser: """ Handles the parsing of the diff or patch file content. This class is responsible for reading the file, handling potential encoding issues, and iterating over the hunks and files defined in the patch. """
[docs] def __init__(self) -> None: """ Initializes the PatchParser instance. """ super().__init__()
def __repr__(self) -> str: return f"{self.__class__.__name__}()"
[docs] @staticmethod def create_line(raw_line: str) -> PatchLine: """ Factory method that maps a raw patch line to its specialized class. This method analyzes the line prefix to determine the semantic role of the line (Header, Hunk, or Data). It is designed to be the central entry point for line instantiation to ensure consistent parsing and high testability. :param raw_line: The complete, unmodified line from the input stream. :returns: A specialized instance (HeadLine, HunkHeadLine, or FileLine). Returns a generic PatchLine for unknown metadata or comments. """ # 1. Datei-Header if raw_line.startswith(("--- ", "+++ ")): return HeadLine(raw_line) # 2. Hunk-Header elif raw_line.startswith("@@ "): return HunkHeadLine(raw_line) # 3. Inhaltszeilen elif raw_line.startswith(("+", "-", " ")): return HunkLine(raw_line) # 4. Fallback für alles andere else: return PatchLine(raw_line)
[docs] @classmethod def get_lines(cls, stream: Iterable[str]) -> Generator[PatchLine, None, None]: """ A generator that transforms a stream of raw strings into PatchLine objects. :param stream: Any iterable of strings (e.g., file handle, list, or generator). :yields: Specialized PatchLine objects. """ for raw_line in stream: # Wir strippen hier nur das Newline am Ende, # damit PatchLine die internen Leerzeichen behält. yield cls.create_line(raw_line)
[docs] def iter_files(self, stream: Iterable[str]) -> Generator[DiffCodeFile, None, None]: """ Iterates over all file-level patches within the provided stream. This method acts as a high-speed state machine, assembling objects directly from raw strings using an efficient if-elif-else sieve. :param stream: An iterable source of raw patch strings. :raises FtwPatchError: If the diff sequence is invalid or corrupted. :returns: A generator yielding complete DiffCodeFile objects. """ current_file: DiffCodeFile | None = None current_hunk: Hunk | None = None line_no = 0 try: for line_no, raw_line in enumerate(stream, start=1): # --- THE SIEVE (Inline for maximum performance) --- # 1. Handle File Headers if raw_line.startswith(("--- ", "+++ ")): line = HeadLine(raw_line) if line.is_orig: # Yield the previously assembled file before starting a new one if current_file: yield current_file current_file = DiffCodeFile(line) current_hunk = None continue elif line.is_new: if current_file is None: raise PatchParseError(f"Line {line_no}: Found '+++' before '---'") current_file.new_header = line continue else: pass # pragma: no cover # 2. Handle Hunk Headers elif raw_line.startswith("@@ "): if current_file is None: raise PatchParseError(f"Line {line_no}: Found '@@ ' before file headers") current_hunk = Hunk(HunkHeadLine(raw_line)) current_file.add_hunk(current_hunk) # 3. Handle Valid Content Lines elif raw_line.startswith(("+", "-", " ")): if current_hunk is None: raise PatchParseError( f"Line {line_no}: Found content line before '@@' header" ) # noqa: E501 current_hunk.add_line(HunkLine(raw_line)) # 4. Handle Metadata and Noise else: # STRICT RULE: No unrecognized lines allowed inside a hunk block if current_hunk is not None: raise PatchParseError( f"Line {line_no}: Invalid line within hunk. Missing prefix (' ', '+', '-')." # noqa: E501 ) # Lines outside of hunks (Git metadata, empty lines) are safely ignored continue # Yield the final file in the stream if current_file: yield current_file except FtwPatchError: # Re-raise known validation errors raise except Exception as e: # Wrap any unexpected low-level errors raise PatchParseError(f"Unexpected error at line {line_no}: {str(e)}")
#!CLASS - PatchParser #!SECTION - Parsers if __name__ == "__main__": # pragma: no cover from doctest import FAIL_FAST, testfile be_verbose = False be_verbose = True option_flags = 0 option_flags = FAIL_FAST test_sum = 0 test_failed = 0 # Pfad zu den dokumentierenden Tests testfiles_dir = Path(__file__).parents[3] / "doc/source/devel" test_file = testfiles_dir / "get_started_parser.rst" # test_file = testfiles_dir / "get_started_ftw_patch.rst" if test_file.exists(): print(f"--- Running Doctest for {test_file.name} ---") doctestresult = testfile( str(test_file), module_relative=False, verbose=be_verbose, optionflags=option_flags, ) test_failed += doctestresult.failed test_sum += doctestresult.attempted if test_failed == 0: print(f"\nDocTests passed without errors, {test_sum} tests.") else: print(f"\nDocTests failed: {test_failed} tests.") else: print(f"⚠️ Warning: Test file {test_file.name} not found.")