25 lines
876 B
Python
25 lines
876 B
Python
from rag_agent.ingest.chunker import chunk_text, chunk_text_by_lines
|
|
|
|
|
|
def test_chunk_text_basic():
|
|
text = "one two three four five six seven eight"
|
|
chunks = chunk_text(text, chunk_size=3, overlap=1)
|
|
assert len(chunks) == 3
|
|
assert chunks[0].text == "one two three"
|
|
assert chunks[1].text.startswith("three four")
|
|
assert chunks[0].start_line is None
|
|
assert chunks[0].end_line is None
|
|
|
|
|
|
def test_chunk_text_by_lines():
|
|
text = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8"
|
|
chunks = chunk_text_by_lines(text, max_lines=3, overlap_lines=1)
|
|
assert len(chunks) == 4
|
|
assert chunks[0].text == "line1\nline2\nline3"
|
|
assert chunks[0].start_line == 1
|
|
assert chunks[0].end_line == 3
|
|
assert chunks[1].start_line == 3
|
|
assert chunks[1].end_line == 5
|
|
assert chunks[3].start_line == 7
|
|
assert chunks[3].end_line == 8
|