from __future__ import annotations import ast from dataclasses import dataclass @dataclass(slots=True) class CodeChunk: title: str text: str start_line: int end_line: int chunk_type: str class CodeTextChunker: def chunk(self, path: str, text: str) -> list[CodeChunk]: try: tree = ast.parse(text) except SyntaxError: return self._window_chunks(path, text) chunks: list[CodeChunk] = [] lines = text.splitlines() for node in tree.body: if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)): continue start = int(getattr(node, "lineno", 1)) end = int(getattr(node, "end_lineno", start)) body = "\n".join(lines[start - 1 : end]).strip() if not body: continue chunks.append( CodeChunk( title=f"{path}:{getattr(node, 'name', 'block')}", text=body, start_line=start, end_line=end, chunk_type="symbol_block", ) ) return chunks or self._window_chunks(path, text) def _window_chunks(self, path: str, text: str) -> list[CodeChunk]: lines = text.splitlines() chunks: list[CodeChunk] = [] size = 80 overlap = 15 start = 0 while start < len(lines): end = min(len(lines), start + size) body = "\n".join(lines[start:end]).strip() if body: chunks.append(CodeChunk(f"{path}:{start + 1}-{end}", body, start + 1, end, "window")) if end >= len(lines): break start = max(0, end - overlap) return chunks