from __future__ import annotations from typing import TYPE_CHECKING from app.modules.rag.explain.excerpt_planner import ExcerptPlanner from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath from app.modules.rag.retrieval.test_filter import is_test_path if TYPE_CHECKING: from app.modules.rag.explain.graph_repository import CodeGraphRepository class SourceExcerptFetcher: def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None: self._graph = graph_repository self._planner = planner or ExcerptPlanner() def fetch( self, rag_session_id: str, trace_paths: list[TracePath], *, max_excerpts: int = 40, ) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]: ordered_symbol_ids: list[str] = [] for path in trace_paths: for symbol_id in path.symbol_ids: if symbol_id and symbol_id not in ordered_symbol_ids: ordered_symbol_ids.append(symbol_id) chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids) excerpts: list[CodeExcerpt] = [] evidence_index: dict[str, EvidenceItem] = {} for chunk in chunks: symbol_id = str(chunk.metadata.get("symbol_id") or "") evidence_id = f"excerpt_{len(evidence_index) + 1}" location = chunk.location evidence_index[evidence_id] = EvidenceItem( evidence_id=evidence_id, kind="excerpt", summary=chunk.title, location=location, supports=[symbol_id] if symbol_id else [], ) is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source) for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id): if len(excerpts) >= max_excerpts: break if is_test_chunk and not excerpt.focus.startswith("test:"): excerpt.focus = f"test:{excerpt.focus}" excerpts.append(excerpt) if len(excerpts) >= max_excerpts: break return excerpts, evidence_index