54 lines
2.2 KiB
Python
54 lines
2.2 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
from app.modules.rag.explain.excerpt_planner import ExcerptPlanner
|
|
from app.modules.rag.explain.models import CodeExcerpt, EvidenceItem, TracePath
|
|
from app.modules.rag.retrieval.test_filter import is_test_path
|
|
|
|
if TYPE_CHECKING:
|
|
from app.modules.rag.explain.graph_repository import CodeGraphRepository
|
|
|
|
|
|
class SourceExcerptFetcher:
|
|
def __init__(self, graph_repository: CodeGraphRepository, planner: ExcerptPlanner | None = None) -> None:
|
|
self._graph = graph_repository
|
|
self._planner = planner or ExcerptPlanner()
|
|
|
|
def fetch(
|
|
self,
|
|
rag_session_id: str,
|
|
trace_paths: list[TracePath],
|
|
*,
|
|
max_excerpts: int = 40,
|
|
) -> tuple[list[CodeExcerpt], dict[str, EvidenceItem]]:
|
|
ordered_symbol_ids: list[str] = []
|
|
for path in trace_paths:
|
|
for symbol_id in path.symbol_ids:
|
|
if symbol_id and symbol_id not in ordered_symbol_ids:
|
|
ordered_symbol_ids.append(symbol_id)
|
|
chunks = self._graph.get_chunks_by_symbol_ids(rag_session_id, ordered_symbol_ids)
|
|
excerpts: list[CodeExcerpt] = []
|
|
evidence_index: dict[str, EvidenceItem] = {}
|
|
for chunk in chunks:
|
|
symbol_id = str(chunk.metadata.get("symbol_id") or "")
|
|
evidence_id = f"excerpt_{len(evidence_index) + 1}"
|
|
location = chunk.location
|
|
evidence_index[evidence_id] = EvidenceItem(
|
|
evidence_id=evidence_id,
|
|
kind="excerpt",
|
|
summary=chunk.title,
|
|
location=location,
|
|
supports=[symbol_id] if symbol_id else [],
|
|
)
|
|
is_test_chunk = bool(chunk.metadata.get("is_test")) or is_test_path(location.path if location else chunk.source)
|
|
for excerpt in self._planner.plan(chunk, evidence_id=evidence_id, symbol_id=symbol_id):
|
|
if len(excerpts) >= max_excerpts:
|
|
break
|
|
if is_test_chunk and not excerpt.focus.startswith("test:"):
|
|
excerpt.focus = f"test:{excerpt.focus}"
|
|
excerpts.append(excerpt)
|
|
if len(excerpts) >= max_excerpts:
|
|
break
|
|
return excerpts, evidence_index
|