Новый раг

This commit is contained in:
2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
class DocTextChunker:
def __init__(self, max_chars: int = 4000, overlap_chars: int = 250) -> None:
self._max_chars = max_chars
self._overlap_chars = overlap_chars
def split(self, text: str) -> list[str]:
cleaned = text.strip()
if not cleaned:
return []
chunks: list[str] = []
start = 0
while start < len(cleaned):
end = min(len(cleaned), start + self._max_chars)
chunks.append(cleaned[start:end].strip())
if end >= len(cleaned):
break
start = max(0, end - self._overlap_chars)
return [chunk for chunk in chunks if chunk]