from __future__ import annotations class DocTextChunker: def __init__(self, max_chars: int = 4000, overlap_chars: int = 250) -> None: self._max_chars = max_chars self._overlap_chars = overlap_chars def split(self, text: str) -> list[str]: cleaned = text.strip() if not cleaned: return [] chunks: list[str] = [] start = 0 while start < len(cleaned): end = min(len(cleaned), start + self._max_chars) chunks.append(cleaned[start:end].strip()) if end >= len(cleaned): break start = max(0, end - self._overlap_chars) return [chunk for chunk in chunks if chunk]