# File: src/fitzzftw/patch/lines.py
# Author: Fitzz TeXnik Welt
# Email: FitzzTeXnikWelt@t-online.de
# License: LGPLv2 or above
"""
lines
===============================
This module provides the structural representation of individual lines within
a patch file. It acts as a specialized parser that converts raw patch
fragments into intelligent objects capable of self-validation and metadata
extraction.
Core Line Types:
----------------
* **PatchLine**:
The base class for all lines, handling basic sanitization
and trailing newline management.
* **HeadLine**:
Manages file headers (--- and +++), extracting paths,
timestamps, and identifying null-device targets.
* **HunkHeadLine**:
Parses the coordination metadata (@@ -l,s +l,s @@),
providing structured access to line ranges.
* **FileLine & HunkLine**:
Represent actual content changes. They include
advanced whitespace normalization logic (collapse, ignore-all) required
for robust patch application across different formatting styles.
Key Features:
-------------
* **Integrated Styling**:
Most line classes inherit from :class:`.base.TerminalColorMixin`
and define a :attr:`~.HeadLine._color_map`, allowing them to print themselves with the
correct semantic color (e.g., green for additions, red for deletions).
* **Whitespace Awareness**:
Objects can dynamically provide different
representations of their content (:attr:`~.FileLine.normalized_ws_content`,
:attr:`~.FileLine.ignore_all_ws_content`)
to support flexible matching algorithms.
* **Protocol Compliance**:
By providing :attr:`~.HeadLine.prefix`, :attr:`~.HeadLine.orig_line`, and
:attr:`~.HeadLine._color_map`,
these classes satisfy the `LineLike` protocol, enabling seamless
integration with the framework's diagnostic and output tools.
"""
import re
from pathlib import Path
from typing import ClassVar
from fitzzftw.patch.base import TerminalColorMixin
from fitzzftw.patch.exceptions import PatchParseError
from fitzzftw.patch.static import ColorKey
# CLASS - PatchLine
[docs]
class PatchLine:
"""
Base class for structural patch lines.
This class serves as the foundation for all specialized line types within
a patch (e.g., HeadLine, FileLine). It handles the initial sanitization
of the raw input stream.
"""
_TRAIL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)[\n\r]*$")
[docs]
def __init__(self, raw_line: str):
"""
Initializes a PatchLine instance by sanitizing the input.
The raw line is stripped of trailing carriage returns and newlines.
Additionally, it removes the specialized 'No newline at end of file'
markers that can appear in Unified Diff format to ensure the
content property contains only the actual text data.
:param raw_line: The unmodified string read directly from the patch source.
"""
# Strict type check to ensure the input is a string
if not isinstance(raw_line, str):
raise PatchParseError(
f"PatchLine expected a string, but received {type(raw_line).__name__}."
)
# Remove standard 'No newline' markers found in diffs
# These markers would otherwise interfere with matching/patching logic.
clean_content = raw_line.removesuffix("\\ No newline at end of file\n").removesuffix(
"\\ No newline at end of file\r\n"
)
self._has_trailing_whitespace: bool = bool(self._TRAIL_WS_RE.search(clean_content))
# Strip trailing newline characters (\n or \r\n)
self._content: str = clean_content.rstrip("\n\r")
self._orig_line: str = self._content
@property
def content(self) -> str:
"""
The cleaned content of the line without trailing newlines (ro).
:returns: A string representing the text content of the line.
"""
return self._content
@property
def orig_line(self) -> str:
"""
The original of the line without trailing newlines (ro).
:returns: A string representing the text content of the line.
"""
return self._orig_line
# TODO: Docstring
@property
def has_trailing_whitespace(self) -> bool:
return self._has_trailing_whitespace
def __repr__(self):
return "".join([self.__class__.__name__, f"(Content: {self.content!r})"])
#!CLASS - PatchLine
# CLASS - HeadLine
[docs]
class HeadLine(TerminalColorMixin, PatchLine):
"""
Represents a file header line within a patch (starting with '--- ' or '+++ ').
This class specializes :py:class:`PatchLine` to handle file-level metadata.
It isolates the file path from the unified diff prefix and provides
convenience methods to identify the role of the file (original vs. new)
within a transition.
"""
_color_map:dict[str,ColorKey]={"--- ": "red", "+++ ":"green"}
[docs]
def __init__(self, raw_line: str):
"""
Initializes the HeadLine by extracting the prefix and path content.
The first four characters are used to identify the header type.
The remainder of the line is passed to the base class to ensure
consistent sanitization of the path string.
:param raw_line: The complete, unmodified header line from the patch.
"""
prefix_candidate = raw_line[:4]
if prefix_candidate not in ("--- ", "+++ "):
raise ValueError(
f"Invalid HeadLine: Expected '--- ' or '+++ ', got {repr(raw_line[:4])}"
)
self._prefix: str = prefix_candidate
parts = raw_line[4:].split("\t", 1)
if len(parts) > 1:
content_candidate = parts[0].rstrip(" ")
super().__init__(parts[1])
self._info = self.content
self._content = content_candidate
else:
super().__init__(parts[0])
self._info = None
self._orig_line= raw_line.rstrip("\n\r")
@property
def prefix(self)->str:
"""
The diff prefix ('--- ' or '+++ ') identified at the start of the line **(ro)**.
:returns: The prefix string.
"""
return self._prefix
@property
def is_orig(self)->bool:
"""
Indicates if this line represents the original (source) file path **(ro)**.
:returns: True if the prefix is '--- ', False otherwise.
"""
return self._prefix == "--- "
@property
def is_new(self)->bool:
"""
Indicates if this line represents the new (target) file path **(ro)**.
:returns: True if the prefix is '+++ ', False otherwise.
"""
return self._prefix == "+++ "
@property
def is_null_path(self)->bool:
"""
Checks if the file path points to a null device (e.g., /dev/null) **(ro)**.
This property uses the static method :py:meth:`~HeadLine.check_is_null_path` to perform the
null-path check.
:returns: True if the content matches a null path pattern.
"""
return self.check_is_null_path(self.content)
@property
def info(self) -> str | None:
"""
Returns the metadata found after the file path (e.g., timestamps) **(ro)**.
This contains everything that was separated by a tab character
from the path. Returns an empty string or None if no metadata
was present.
:returns: The metadata string or None.
"""
return self._info
[docs]
def get_path(self, strip_count: int) -> Path:
"""
Returns the path as a Path object, stripped of N leading segments.
:param strip_count: Number of leading path components to remove.
:raises ValueError: If strip_count is too high for the available segments or is negative.
:returns: A Path object of the remaining segments.
"""
p = Path(self.content)
segments = p.parts
if strip_count < 0:
raise ValueError(f"Strip count must be non-negative, got {strip_count}")
if strip_count >= len(segments):
raise ValueError(
f"Strip level -p{strip_count} is too high for path '{self.content}' "
f"(only {len(segments)} segments available)."
)
return Path(*segments[strip_count:])
[docs]
@staticmethod
def check_is_null_path(path: Path | str) -> bool:
"""Check if the given path represents a null path marker.
This function detects special paths used in patch files to signify
file deletion or creation, specifically:
1. '/dev/null' (POSIX standard, case-sensitive).
2. 'NUL' (Windows standard, case-insensitive).
The implementation is hard-coded for maximum performance and stability,
as these standards are highly unlikely to change.
:param path: The path object or string to check.
:returns: :py:obj:`True` if the path matches a known null path
marker, :py:obj:`False` otherwise.
"""
if isinstance(path, Path):
path_str = path.as_posix()
elif isinstance(path, str):
path_str = path
else:
return False
# 1. POSIX Check: Must match '/dev/null' exactly (case-sensitive).
# This ensures correctness on POSIX filesystems.
if path_str == "/dev/null":
return True
# 2. Windows Check: Must match 'NUL' (case-insensitive).
# This handles patches created on Windows/DOS systems (e.g., 'nul', 'NuL').
if path_str.upper() == "NUL":
return True
return False
def __repr__(self):
return "".join(
[self.__class__.__name__, f"(Content: {self.content!r}, Prefix: {self.prefix!r})"]
)
#!CLASS - HeadLine
# CLASS - HunkHead
[docs]
class HunkHeadLine(TerminalColorMixin, PatchLine):
"""
Represents a hunk header line within a patch (starting with '@@ ').
This class specializes :py:class:`PatchLine` to handle coordinate metadata.
It isolates the range information from optional context info (like function names)
and provides parsed access to the line numbers.
"""
_color_map: dict[str, ColorKey] = {"@@ ": "cyan"}
_HUNK_RE: ClassVar[re.Pattern] = re.compile(
r"^-(?P<old_start>\d+)(?:,(?P<old_len>\d+))? "
r"\+(?P<new_start>\d+)(?:,(?P<new_len>\d+))?"
)
[docs]
def __init__(self, raw_line: str):
"""
Initializes the HunkHeadLine by extracting coordinates and optional info.
:param raw_line: The complete, unmodified hunk header line.
:raises ValueError: If the prefix or the coordinates are invalid.
"""
# Strict type check to ensure the input is a string
if not isinstance(raw_line, str):
raise PatchParseError(
f"PatchLine expected a string, but received {type(raw_line).__name__}."
)
if not raw_line.startswith("@@ "):
raise ValueError(f"Invalid HunkHeadLine: Expected '@@ ', got {repr(raw_line[:3])}")
self._prefix = "@@ "
# Split beim schließenden " @@", um Koordinaten von Info zu trennen
parts = raw_line[3:].split(" @@", 1)
if len(parts) < 2:
raise ValueError(f"Invalid HunkHeader: Missing closing ' @@' in {repr(raw_line)}")
if parts[1].strip():
# Fall: @@ -l,s +l,s @@ Context-Info
coord_candidate = parts[0]
super().__init__(parts[1])
self._info = self.content
self._content = coord_candidate
self._suffix_marker = " @@"
else:
# Fall: Nur @@ -l,s +l,s @@
super().__init__(parts[0])
self._info = None
self._suffix_marker = " @@"
self._orig_line = raw_line.rstrip("\n\r")
# Koordinaten-Validierung auf dem isolierten Koordinaten-String
match = self._HUNK_RE.match(self.content)
if not match:
raise ValueError(f"Invalid Hunk coordinates: {repr(self.content)}")
# Integer-Konvertierung
self._old_start = int(match.group("old_start"))
self._old_len = int(match.group("old_len")) if match.group("old_len") else 1
self._new_start = int(match.group("new_start"))
self._new_len = int(match.group("new_len")) if match.group("new_len") else 1
@property
def prefix(self) -> str:
"""The '@@ ' prefix at the start of the line **(ro)**."""
return self._prefix
@property
def info(self) -> str | None:
"""The optional context information after the coordinates **(ro)**."""
return self._info
@property
def old_start(self) -> int:
"""The starting line number in the original file **(ro)**."""
return self._old_start
@property
def old_len(self) -> int:
"""The number of lines affected in the original file **(ro)**."""
return self._old_len
@property
def new_start(self) -> int:
"""The starting line number in the new file **(ro)**."""
return self._new_start
@property
def new_len(self) -> int:
"""The number of lines in the new hunk **(ro)**."""
return self._new_len
@property
def coords(self) -> tuple[int, int, int, int]:
"""All coordinates as a tuple: (old_start, old_len, new_start, new_len) **(ro)**."""
return self._old_start, self._old_len, self._new_start, self._new_len
def __repr__(self):
# info_part = f" | Info: {self._info}" if self._info else ""
return "".join(
[
self.__class__.__name__,
# f"(Content: {self.prefix}{self.content}{self._suffix_marker}{info_part})"
f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
]
)
#!CLASS - HunkHead
# CLASS - FileLine
[docs]
class FileLine(PatchLine):
"""
Represents a single line read from a file or contained within a patch hunk.
The primary responsibility is to handle the line content and its associated
prefix (if it comes from a patch) consistently, especially by immediately
stripping the trailing newline character upon initialization.
The actual content, free of the trailing newline, is exposed via the
:py:attr:`~FileLine.content` property.
"""
_INTERNAL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)")
_ALL_WS_RE: ClassVar[re.Pattern] = re.compile(r"\s+")
# _TRAIL_WS_RE: ClassVar[re.Pattern] = re.compile(r"([ \t\f\v]+)[\n\r]*$")
[docs]
def __init__(self, raw_line: str):
"""
Initializes the FileLine instance.
The raw line content is processed immediately: the trailing newline
character is removed, and the cleaned content is stored internally.
This prevents issues where the newline character interferes with
hunk application logic.
:param raw_line: The complete, unmodified line string, typically including
a trailing newline.
"""
self._prefix: str = ""
super().__init__(raw_line)
self._has_newline = raw_line.endswith("\n")
def __repr__(self):
return "".join(
[
self.__class__.__name__,
# f"(Content: {self.prefix}{self.content})",
f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
]
)
# --- Content Properties ---
@property
def content(self) -> str:
"""
The raw line content, stripped of the diff prefix and trailing newline **(ro)**.
This value is used for standard matching when no whitespace flags are set.
:returns: The cleaned line content as a string.
"""
return self._content
@property
def normalized_ws_content(self) -> str:
"""
The line content, dynamically normalized according to the --normalize-ws rule **(ro)**.
Internal whitespace runs collapse to a single space; trailing
whitespace is removed; leading whitespace is preserved.
:returns: The normalized string used for matches.
"""
content = self._content.replace("\xa0", " ")
# 1. Find the index of the first non-whitespace character and separate
# Lstrip returns the string without leading whitespace.
stripped_content = content.lstrip(" \t\f\v")
first_non_ws_index = len(content) - len(stripped_content)
# 2. Extract the leading whitespace (must be preserved)
leading_ws = content[:first_non_ws_index]
# 3. Apply normalization (collapse) to the REST of the line
# Only internal whitespace is replaced.
collapsed_content = self._INTERNAL_WS_RE.sub(" ", stripped_content)
# 4. Remove trailing whitespace (from the end of collapsed_content)
final_content = collapsed_content.rstrip(" \t\f\v")
# 5. Re-append leading whitespace and return
return leading_ws + final_content
@property
def ignore_all_ws_content(self) -> str:
"""
The line content, dynamically normalized according to the --ignore-all-ws rule **(ro)**.
All forms of whitespace (leading, internal, trailing) are removed from the string.
:returns: The string content with all whitespace removed.
"""
return self._ALL_WS_RE.sub("", self._content)
# --- Metadata & Convenience Properties ---
@property
def prefix(self) -> str:
"""
The diff prefix character (' ', '+', or '-') **(ro)**.
:returns: The prefix character.
"""
return self._prefix
@property
def has_trailing_whitespace(self) -> bool:
"""
Indicates if the original raw line contained trailing whitespace before the newline
**(ro)**.
:returns: Boolean value.
"""
return self._has_trailing_whitespace
@property
def is_empty(self) -> bool:
"""
Checks if the line content is an empty string **(ro)**.
:returns: True if the line content is empty, False otherwise.
"""
if self.content:
return False
else:
return True
@property
def line_string(self) -> str:
"""
Get the processed line for filesystem output **(ro)**.
The returned string includes the original line terminator only if the
source line had one. This ensures that files without a trailing
newline (e.g., at the end of the file) are reconstructed identically
to their original or patched state.
:returns: The content string, optionally suffixed with a newline.
"""
return self.content + ("\n" if self.has_newline else "")
@property
def has_newline(self) -> bool:
"""
State of the newline termination at the end of the line (**(rw)**).
:param value: Set to False if the line lacks a trailing newline.
:returns: True if the line ends with a newline, False otherwise.
"""
return self._has_newline
@has_newline.setter
def has_newline(self, value: bool) -> None:
"""
State of the newline termination at the end of the line.
:param value: Set to False if the line lacks a trailing newline.
"""
self._has_newline = value
#!CLASS FileLine
# CLASS - HunkLine
[docs]
class HunkLine(TerminalColorMixin,FileLine):
"""
Represents a single content line within a hunk block of a unified diff.
The class parses the raw diff line upon instantiation and provides
dynamically calculated, read-only content properties for different
levels of whitespace normalization.
"""
_color_map: dict[str, ColorKey] = {
"+": "green",
"-": "red",
" ": "terminal",
}
[docs]
def __init__(self, raw_line: str) -> None:
"""
Initializes the HunkLine by parsing the raw line.
The raw line must start with a valid diff prefix (' ', '+', or '-').
The content is stored without the final newline character.
:param raw_line: The raw line from the patch file (including prefix).
:raises PatchParseError: If the prefix is invalid or missing.
"""
if not raw_line or raw_line[0] not in (" ", "+", "-"):
raise PatchParseError(
f"Hunk content line missing valid prefix (' ', '+', '-') or is empty: {raw_line!r}"
)
super().__init__(raw_line[1:])
self._prefix: str = raw_line[0]
self._has_newline: bool = True # Default to POSIX standard
self._orig_line = raw_line.rstrip("\n\r")
def __repr__(self):
return "".join(
[
self.__class__.__name__,
# f"(Content: {self.prefix}{self.content})",
f"(Content: {self.content!r}, Prefix: {self.prefix!r})",
]
)
# --- Content Properties ---
@property
def prefix(self) -> str:
"""
The diff prefix character (' ', '+', or '-') **(ro)**.
:returns: The prefix character.
"""
return self._prefix
@property
def is_context(self) -> bool:
"""Returns True if the line is a context line (' ') **(ro)**.
:returns: Boolean value.
"""
return self._prefix == " "
@property
def is_addition(self) -> bool:
"""Returns True if the line is an addition line ('+') **(ro)**.
:returns: Boolean value.
"""
return self._prefix == "+"
@property
def is_deletion(self) -> bool:
"""Returns True if the line is a deletion line ('-') **(ro)**.
:returns: Boolean value.
"""
return self._prefix == "-"
#!CLASS HunkLine
# Hier den Code einfügen
if __name__ == "__main__": # pragma: no cover
from doctest import FAIL_FAST, testfile
be_verbose = False
be_verbose = True
option_flags = 0
option_flags = FAIL_FAST
test_sum = 0
test_failed = 0
# Pfad zu den dokumentierenden Tests
testfiles_dir = Path(__file__).parents[3] / "doc/source/devel"
test_file = testfiles_dir / "get_started_lines.rst"
# test_file = testfiles_dir / "get_started_ftw_patch.rst"
if test_file.exists():
print(f"--- Running Doctest for {test_file.name} ---")
doctestresult = testfile(
str(test_file),
module_relative=False,
verbose=be_verbose,
optionflags=option_flags,
)
test_failed += doctestresult.failed
test_sum += doctestresult.attempted
if test_failed == 0:
print(f"\nDocTests passed without errors, {test_sum} tests.")
else:
print(f"\nDocTests failed: {test_failed} tests.")
else:
print(f"⚠️ Warning: Test file {test_file.name} not found.")