Files

52 lines
2.3 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from app.core.agent.intent_router import IntentRouterV2
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
from tests.docs_qa_eval.config import DocsEvalConfig
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
from tests.docs_qa_eval.golden_loader import DocsGoldenCase, load_cases
from tests.unit_tests.rag.intent_router_testkit import repo_context
@dataclass(slots=True, frozen=True)
class DocsEvalCaseResult:
case: DocsGoldenCase
intent_ok: bool
layers_ok: bool
retrieval_non_empty: bool
openapi_complete: bool
gate_ok: bool
openapi_output_ok: bool
def run_eval(config: DocsEvalConfig) -> list[DocsEvalCaseResult]:
router = IntentRouterV2()
cases = load_cases(config.golden_cases_path)
results: list[DocsEvalCaseResult] = []
for case in cases:
pipeline = DocsQAPipelineRunner(router=router, retrieval_adapter=InMemoryDocsRetrievalAdapter(list(case.rows)), repo_context=repo_context())
result = pipeline.run(case.query, rag_session_id="docs-test-session", mode=config.pipeline_mode)
actual_layers = tuple(item.layer_id for item in result.router_result.retrieval_spec.layer_queries)
diagnostics = result.diagnostics
openapi_complete = True
gate_ok = diagnostics.gate_decision in {"allow", "partial"}
openapi_output_ok = True
if case.expected_intent == "OPENAPI_GENERATION":
openapi_complete = diagnostics.openapi_status["has_path"] and diagnostics.openapi_status["has_method"]
gate_ok = diagnostics.gate_decision in {"allow", "partial"}
openapi_output_ok = bool(result.answer.strip()) and ("paths:" in result.answer or "type: object" in result.answer)
results.append(
DocsEvalCaseResult(
case=case,
intent_ok=result.router_result.intent == case.expected_intent and result.router_result.query_plan.sub_intent == case.expected_sub_intent,
layers_ok=actual_layers == case.expected_layers,
retrieval_non_empty=bool(result.raw_rows),
openapi_complete=openapi_complete,
gate_ok=gate_ok,
openapi_output_ok=openapi_output_ok,
)
)
return results