Files
RagAgent/tests/test_chunker.py

25 lines
876 B
Python

from rag_agent.ingest.chunker import chunk_text, chunk_text_by_lines
def test_chunk_text_basic():
text = "one two three four five six seven eight"
chunks = chunk_text(text, chunk_size=3, overlap=1)
assert len(chunks) == 3
assert chunks[0].text == "one two three"
assert chunks[1].text.startswith("three four")
assert chunks[0].start_line is None
assert chunks[0].end_line is None
def test_chunk_text_by_lines():
text = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8"
chunks = chunk_text_by_lines(text, max_lines=3, overlap_lines=1)
assert len(chunks) == 4
assert chunks[0].text == "line1\nline2\nline3"
assert chunks[0].start_line == 1
assert chunks[0].end_line == 3
assert chunks[1].start_line == 3
assert chunks[1].end_line == 5
assert chunks[3].start_line == 7
assert chunks[3].end_line == 8