Source code for fitzzftw.patch.lines

# File: src/fitzzftw/patch/lines.py
# Author: Fitzz TeXnik Welt
# Email: FitzzTeXnikWelt@t-online.de
# License: LGPLv2 or above

"""
lines
===============================

This module provides the structural representation of individual lines within
a patch file. It acts as a specialized parser that converts raw patch
fragments into intelligent objects capable of self-validation and metadata
extraction.

Core Line Types:
----------------
* **PatchLine**: 
  The base class for all lines, handling basic sanitization
  and trailing newline management.
* **HeadLine**: 
  Manages file headers (--- and +++), extracting paths,
  timestamps, and identifying null-device targets.
* **HunkHeadLine**: 
  Parses the coordination metadata (@@ -l,s +l,s @@),
  providing structured access to line ranges.
* **FileLine & HunkLine**: 
  Represent actual content changes. They include
  advanced whitespace normalization logic (collapse, ignore-all) required
  for robust patch application across different formatting styles.

Key Features:
-------------
* **Integrated Styling**:
  Most line classes inherit from :class:`.base.TerminalColorMixin`
  and define a :attr:`~.HeadLine._color_map`, allowing them to print themselves with the
  correct semantic color (e.g., green for additions, red for deletions).
* **Whitespace Awareness**:
  Objects can dynamically provide different
  representations of their content (:attr:`~.FileLine.normalized_ws_content`,
  :attr:`~.FileLine.ignore_all_ws_content`)
  to support flexible matching algorithms.
* **Protocol Compliance**:
  By providing :attr:`~.HeadLine.prefix`, :attr:`~.HeadLine.orig_line`, and
  :attr:`~.HeadLine._color_map`,
  these classes satisfy the `LineLike` protocol, enabling seamless
  integration with the framework's diagnostic and output tools.
"""

import re
from pathlib import Path
from typing import ClassVar

from fitzzftw.patch.base import TerminalColorMixin
from fitzzftw.patch.exceptions import PatchParseError
from fitzzftw.patch.static import ColorKey


# CLASS - PatchLine

[docs]
class PatchLine:
    """
    Base class for structural patch lines.

    This class serves as the foundation for all specialized line types within
    a patch (e.g., HeadLine, FileLine). It handles the initial sanitization
    of the raw input stream.
    """

    _TRAIL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)[\n\r]*$")


[docs]
    def __init__(self, raw_line: str):
        """
        Initializes a PatchLine instance by sanitizing the input.

        The raw line is stripped of trailing carriage returns and newlines.
        Additionally, it removes the specialized 'No newline at end of file'
        markers that can appear in Unified Diff format to ensure the
        content property contains only the actual text data.

        :param raw_line: The unmodified string read directly from the patch source.
        """
        # Strict type check to ensure the input is a string
        if not isinstance(raw_line, str):
            raise PatchParseError(
                f"PatchLine expected a string, but received {type(raw_line).__name__}."
            )

        # Remove standard 'No newline' markers found in diffs
        # These markers would otherwise interfere with matching/patching logic.
        clean_content = raw_line.removesuffix("\\ No newline at end of file\n").removesuffix(
            "\\ No newline at end of file\r\n"
        )
        self._has_trailing_whitespace: bool = bool(self._TRAIL_WS_RE.search(clean_content))

        # Strip trailing newline characters (\n or \r\n)
        self._content: str = clean_content.rstrip("\n\r")
        self._orig_line: str = self._content


    @property
    def content(self) -> str:
        """
        The cleaned content of the line without trailing newlines (ro).

        :returns: A string representing the text content of the line.
        """
        return self._content

    @property
    def orig_line(self) -> str:
        """
        The original of the line without trailing newlines (ro).

        :returns: A string representing the text content of the line.
        """
        return self._orig_line

    # TODO: Docstring
    @property
    def has_trailing_whitespace(self) -> bool:
        return self._has_trailing_whitespace

    def __repr__(self):
        return "".join([self.__class__.__name__, f"(Content: {self.content!r})"])



#!CLASS - PatchLine


# CLASS - HeadLine

[docs]
class HeadLine(TerminalColorMixin, PatchLine):
    """
    Represents a file header line within a patch (starting with '--- ' or '+++ ').

    This class specializes :py:class:`PatchLine` to handle file-level metadata.
    It isolates the file path from the unified diff prefix and provides
    convenience methods to identify the role of the file (original vs. new)
    within a transition.
    """

    _color_map:dict[str,ColorKey]={"--- ": "red", "+++ ":"green"}


[docs]
    def __init__(self, raw_line: str):
        """
        Initializes the HeadLine by extracting the prefix and path content.

        The first four characters are used to identify the header type.
        The remainder of the line is passed to the base class to ensure
        consistent sanitization of the path string.

        :param raw_line: The complete, unmodified header line from the patch.
        """
        prefix_candidate = raw_line[:4]
        if prefix_candidate not in ("--- ", "+++ "):
            raise ValueError(
                f"Invalid HeadLine: Expected '--- ' or '+++ ', got {repr(raw_line[:4])}"
            )

        self._prefix: str = prefix_candidate
        parts = raw_line[4:].split("\t", 1)
        if len(parts) > 1:
            content_candidate = parts[0].rstrip(" ")
            super().__init__(parts[1])
            self._info = self.content
            self._content = content_candidate
        else:
            super().__init__(parts[0])
            self._info = None
        self._orig_line= raw_line.rstrip("\n\r")


    @property
    def prefix(self)->str:
        """
        The diff prefix ('--- ' or '+++ ') identified at the start of the line **(ro)**.

        :returns: The prefix string.
        """
        return self._prefix

    @property
    def is_orig(self)->bool:
        """
        Indicates if this line represents the original (source) file path **(ro)**.

        :returns: True if the prefix is '--- ', False otherwise.
        """
        return self._prefix == "--- "

    @property
    def is_new(self)->bool:
        """
        Indicates if this line represents the new (target) file path **(ro)**.

        :returns: True if the prefix is '+++ ', False otherwise.
        """
        return self._prefix == "+++ "

    @property
    def is_null_path(self)->bool:
        """
        Checks if the file path points to a null device (e.g., /dev/null) **(ro)**.

        This property uses the static method :py:meth:`~HeadLine.check_is_null_path` to perform the 
        null-path check.

        :returns: True if the content matches a null path pattern.
        """
        return self.check_is_null_path(self.content)

    @property
    def info(self) -> str | None:
        """
        Returns the metadata found after the file path (e.g., timestamps) **(ro)**.

        This contains everything that was separated by a tab character
        from the path. Returns an empty string or None if no metadata
        was present.

        :returns: The metadata string or None.
        """

        return self._info


[docs]
    def get_path(self, strip_count: int) -> Path:
        """
        Returns the path as a Path object, stripped of N leading segments.

        :param strip_count: Number of leading path components to remove.
        :raises ValueError: If strip_count is too high for the available segments or is negative.
        :returns: A Path object of the remaining segments.
        """
        p = Path(self.content)
        segments = p.parts

        if strip_count < 0:
            raise ValueError(f"Strip count must be non-negative, got {strip_count}")

        if strip_count >= len(segments):
            raise ValueError(
                f"Strip level -p{strip_count} is too high for path '{self.content}' "
                f"(only {len(segments)} segments available)."
            )

        return Path(*segments[strip_count:])



[docs]
    @staticmethod
    def check_is_null_path(path: Path | str) -> bool:
        """Check if the given path represents a null path marker.

        This function detects special paths used in patch files to signify
        file deletion or creation, specifically:
        1. '/dev/null' (POSIX standard, case-sensitive).
        2. 'NUL' (Windows standard, case-insensitive).

        The implementation is hard-coded for maximum performance and stability,
        as these standards are highly unlikely to change.

        :param path: The path object or string to check.
        :returns: :py:obj:`True` if the path matches a known null path
                marker, :py:obj:`False` otherwise.
        """
        if isinstance(path, Path):
            path_str = path.as_posix()
        elif isinstance(path, str):
            path_str = path
        else:
            return False

        # 1. POSIX Check: Must match '/dev/null' exactly (case-sensitive).
        # This ensures correctness on POSIX filesystems.
        if path_str == "/dev/null":
            return True

        # 2. Windows Check: Must match 'NUL' (case-insensitive).
        # This handles patches created on Windows/DOS systems (e.g., 'nul', 'NuL').
        if path_str.upper() == "NUL":
            return True

        return False


    def __repr__(self):
        return "".join(
            [self.__class__.__name__, f"(Content: {self.content!r}, Prefix: {self.prefix!r})"]
        )



#!CLASS - HeadLine
# CLASS -  HunkHead



[docs]
class HunkHeadLine(TerminalColorMixin, PatchLine):
    """
    Represents a hunk header line within a patch (starting with '@@ ').

    This class specializes :py:class:`PatchLine` to handle coordinate metadata.
    It isolates the range information from optional context info (like function names)
    and provides parsed access to the line numbers.
    """

    _color_map: dict[str, ColorKey] = {"@@ ": "cyan"}

    _HUNK_RE: ClassVar[re.Pattern] = re.compile(
        r"^-(?P<old_start>\d+)(?:,(?P<old_len>\d+))? "
        r"\+(?P<new_start>\d+)(?:,(?P<new_len>\d+))?"
    )


[docs]
    def __init__(self, raw_line: str):
        """
        Initializes the HunkHeadLine by extracting coordinates and optional info.

        :param raw_line: The complete, unmodified hunk header line.
        :raises ValueError: If the prefix or the coordinates are invalid.
        """
        # Strict type check to ensure the input is a string
        if not isinstance(raw_line, str):
            raise PatchParseError(
                f"PatchLine expected a string, but received {type(raw_line).__name__}."
            )

        if not raw_line.startswith("@@ "):
            raise ValueError(f"Invalid HunkHeadLine: Expected '@@ ', got {repr(raw_line[:3])}")

        self._prefix = "@@ "

        # Split beim schließenden " @@", um Koordinaten von Info zu trennen
        parts = raw_line[3:].split(" @@", 1)
        if len(parts) < 2:
            raise ValueError(f"Invalid HunkHeader: Missing closing ' @@' in {repr(raw_line)}")
        if parts[1].strip():
            # Fall: @@ -l,s +l,s @@ Context-Info
            coord_candidate = parts[0]
            super().__init__(parts[1])
            self._info = self.content
            self._content = coord_candidate
            self._suffix_marker = " @@"
        else:
            # Fall: Nur @@ -l,s +l,s @@
            super().__init__(parts[0])
            self._info = None
            self._suffix_marker = " @@"

        self._orig_line = raw_line.rstrip("\n\r")
        # Koordinaten-Validierung auf dem isolierten Koordinaten-String
        match = self._HUNK_RE.match(self.content)
        if not match:
            raise ValueError(f"Invalid Hunk coordinates: {repr(self.content)}")

        # Integer-Konvertierung
        self._old_start = int(match.group("old_start"))
        self._old_len = int(match.group("old_len")) if match.group("old_len") else 1
        self._new_start = int(match.group("new_start"))
        self._new_len = int(match.group("new_len")) if match.group("new_len") else 1


    @property
    def prefix(self) -> str:
        """The '@@ ' prefix at the start of the line **(ro)**."""
        return self._prefix

    @property
    def info(self) -> str | None:
        """The optional context information after the coordinates **(ro)**."""
        return self._info

    @property
    def old_start(self) -> int:
        """The starting line number in the original file **(ro)**."""
        return self._old_start

    @property
    def old_len(self) -> int:
        """The number of lines affected in the original file **(ro)**."""
        return self._old_len

    @property
    def new_start(self) -> int:
        """The starting line number in the new file **(ro)**."""
        return self._new_start

    @property
    def new_len(self) -> int:
        """The number of lines in the new hunk **(ro)**."""
        return self._new_len

    @property
    def coords(self) -> tuple[int, int, int, int]:
        """All coordinates as a tuple: (old_start, old_len, new_start, new_len) **(ro)**."""
        return self._old_start, self._old_len, self._new_start, self._new_len

    def __repr__(self):
        # info_part = f" | Info: {self._info}" if self._info else ""
        return "".join(
            [
                self.__class__.__name__,
                # f"(Content: {self.prefix}{self.content}{self._suffix_marker}{info_part})"
                f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
            ]
        )



#!CLASS -  HunkHead


# CLASS - FileLine

[docs]
class FileLine(PatchLine):
    """
    Represents a single line read from a file or contained within a patch hunk.

    The primary responsibility is to handle the line content and its associated
    prefix (if it comes from a patch) consistently, especially by immediately
    stripping the trailing newline character upon initialization.

    The actual content, free of the trailing newline, is exposed via the
    :py:attr:`~FileLine.content` property.
    """

    _INTERNAL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)")
    _ALL_WS_RE: ClassVar[re.Pattern] = re.compile(r"\s+")
    # _TRAIL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)[\n\r]*$")


[docs]
    def __init__(self, raw_line: str):
        """
        Initializes the FileLine instance.

        The raw line content is processed immediately: the trailing newline
        character is removed, and the cleaned content is stored internally.
        This prevents issues where the newline character interferes with
        hunk application logic.

        :param raw_line: The complete, unmodified line string, typically including
                         a trailing newline.
        """
        self._prefix: str = ""
        super().__init__(raw_line)
        self._has_newline = raw_line.endswith("\n")


    def __repr__(self):
        return "".join(
            [
                self.__class__.__name__,
                # f"(Content: {self.prefix}{self.content})",
                f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
            ]
        )

    # --- Content Properties ---

    @property
    def content(self) -> str:
        """
        The raw line content, stripped of the diff prefix and trailing newline **(ro)**.

        This value is used for standard matching when no whitespace flags are set.

        :returns: The cleaned line content as a string.
        """
        return self._content

    @property
    def normalized_ws_content(self) -> str:
        """
        The line content, dynamically normalized according to the --normalize-ws rule **(ro)**.

        Internal whitespace runs collapse to a single space; trailing
        whitespace is removed; leading whitespace is preserved.

        :returns: The normalized string used for matches.
        """
        content = self._content.replace("\xa0", " ")

        # 1. Find the index of the first non-whitespace character and separate
        # Lstrip returns the string without leading whitespace.
        stripped_content = content.lstrip(" \t\f\v")
        first_non_ws_index = len(content) - len(stripped_content)

        # 2. Extract the leading whitespace (must be preserved)
        leading_ws = content[:first_non_ws_index]

        # 3. Apply normalization (collapse) to the REST of the line
        # Only internal whitespace is replaced.
        collapsed_content = self._INTERNAL_WS_RE.sub(" ", stripped_content)

        # 4. Remove trailing whitespace (from the end of collapsed_content)
        final_content = collapsed_content.rstrip(" \t\f\v")

        # 5. Re-append leading whitespace and return
        return leading_ws + final_content

    @property
    def ignore_all_ws_content(self) -> str:
        """
        The line content, dynamically normalized according to the --ignore-all-ws rule **(ro)**.

        All forms of whitespace (leading, internal, trailing) are removed from the string.

        :returns: The string content with all whitespace removed.
        """
        return self._ALL_WS_RE.sub("", self._content)

    # --- Metadata & Convenience Properties ---

    @property
    def prefix(self) -> str:
        """
        The diff prefix character (' ', '+', or '-') **(ro)**.

        :returns: The prefix character.
        """
        return self._prefix

    @property
    def has_trailing_whitespace(self) -> bool:
        """
        Indicates if the original raw line contained trailing whitespace before the newline
        **(ro)**.

        :returns: Boolean value.
        """
        return self._has_trailing_whitespace

    @property
    def is_empty(self) -> bool:
        """
        Checks if the line content is an empty string **(ro)**.

        :returns: True if the line content is empty, False otherwise.
        """
        if self.content:
            return False
        else:
            return True

    @property
    def line_string(self) -> str:
        """
        Get the processed line for filesystem output **(ro)**.

        The returned string includes the original line terminator only if the
        source line had one. This ensures that files without a trailing
        newline (e.g., at the end of the file) are reconstructed identically
        to their original or patched state.

        :returns: The content string, optionally suffixed with a newline.
        """
        return self.content + ("\n" if self.has_newline else "")

    @property
    def has_newline(self) -> bool:
        """
        State of the newline termination at the end of the line (**(rw)**).

        :param value: Set to False if the line lacks a trailing newline.
        :returns: True if the line ends with a newline, False otherwise.
        """
        return self._has_newline

    @has_newline.setter
    def has_newline(self, value: bool) -> None:
        """
        State of the newline termination at the end of the line.

        :param value: Set to False if the line lacks a trailing newline.
        """
        self._has_newline = value



#!CLASS FileLine


# CLASS - HunkLine

[docs]
class HunkLine(TerminalColorMixin,FileLine):
    """
    Represents a single content line within a hunk block of a unified diff.

    The class parses the raw diff line upon instantiation and provides
    dynamically calculated, read-only content properties for different
    levels of whitespace normalization.
    """

    _color_map: dict[str, ColorKey] = {
        "+": "green",
        "-": "red",
        " ": "terminal",
    }


[docs]
    def __init__(self, raw_line: str) -> None:
        """
        Initializes the HunkLine by parsing the raw line.

        The raw line must start with a valid diff prefix (' ', '+', or '-').
        The content is stored without the final newline character.

        :param raw_line: The raw line from the patch file (including prefix).
        :raises PatchParseError: If the prefix is invalid or missing.
        """
        if not raw_line or raw_line[0] not in (" ", "+", "-"):
            raise PatchParseError(
                f"Hunk content line missing valid prefix (' ', '+', '-') or is empty: {raw_line!r}"
            )

        super().__init__(raw_line[1:])
        self._prefix: str = raw_line[0]
        self._has_newline: bool = True  # Default to POSIX standard
        self._orig_line = raw_line.rstrip("\n\r")


    def __repr__(self):
        return "".join(
            [
                self.__class__.__name__,
                # f"(Content: {self.prefix}{self.content})",
                f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
            ]
        )

    # --- Content Properties ---
    @property
    def prefix(self) -> str:
        """
        The diff prefix character (' ', '+', or '-') **(ro)**.

        :returns: The prefix character.
        """
        return self._prefix

    @property
    def is_context(self) -> bool:
        """Returns True if the line is a context line (' ') **(ro)**.

        :returns: Boolean value.
        """
        return self._prefix == " "

    @property
    def is_addition(self) -> bool:
        """Returns True if the line is an addition line ('+') **(ro)**.

        :returns: Boolean value.
        """
        return self._prefix == "+"

    @property
    def is_deletion(self) -> bool:
        """Returns True if the line is a deletion line ('-') **(ro)**.

        :returns: Boolean value.
        """
        return self._prefix == "-"




#!CLASS HunkLine

# Hier den Code einfügen

if __name__ == "__main__":  # pragma: no cover
    from doctest import FAIL_FAST, testfile
    
    be_verbose = False
    be_verbose = True
    option_flags = 0
    option_flags = FAIL_FAST
    test_sum = 0
    test_failed = 0
    
    # Pfad zu den dokumentierenden Tests
    testfiles_dir = Path(__file__).parents[3] / "doc/source/devel"
    test_file = testfiles_dir / "get_started_lines.rst"
    # test_file = testfiles_dir / "get_started_ftw_patch.rst"

    if test_file.exists():
        print(f"--- Running Doctest for {test_file.name} ---")
        doctestresult = testfile(
            str(test_file),
            module_relative=False,
            verbose=be_verbose,
            optionflags=option_flags,
        )
        test_failed += doctestresult.failed
        test_sum += doctestresult.attempted
        if test_failed == 0:
            print(f"\nDocTests passed without errors, {test_sum} tests.")
        else:
            print(f"\nDocTests failed: {test_failed} tests.")
    else:
        print(f"⚠️ Warning: Test file {test_file.name} not found.")