from __future__ import annotations from dataclasses import dataclass from app.modules.agent.intent_router_v2 import IntentRouterV2 from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner from tests.docs_qa_eval.config import DocsEvalConfig from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter from tests.docs_qa_eval.golden_loader import DocsGoldenCase, load_cases from tests.unit_tests.rag.intent_router_testkit import repo_context @dataclass(slots=True, frozen=True) class DocsEvalCaseResult: case: DocsGoldenCase intent_ok: bool layers_ok: bool retrieval_non_empty: bool openapi_complete: bool gate_ok: bool openapi_output_ok: bool def run_eval(config: DocsEvalConfig) -> list[DocsEvalCaseResult]: router = IntentRouterV2() cases = load_cases(config.golden_cases_path) results: list[DocsEvalCaseResult] = [] for case in cases: pipeline = DocsQAPipelineRunner(router=router, retrieval_adapter=InMemoryDocsRetrievalAdapter(list(case.rows)), repo_context=repo_context()) result = pipeline.run(case.query, rag_session_id="docs-test-session", mode=config.pipeline_mode) actual_layers = tuple(item.layer_id for item in result.router_result.retrieval_spec.layer_queries) diagnostics = result.diagnostics openapi_complete = True gate_ok = diagnostics.gate_decision in {"allow", "partial"} openapi_output_ok = True if case.expected_intent == "OPENAPI_GENERATION": openapi_complete = diagnostics.openapi_status["has_path"] and diagnostics.openapi_status["has_method"] gate_ok = diagnostics.gate_decision in {"allow", "partial"} openapi_output_ok = bool(result.answer.strip()) and ("paths:" in result.answer or "type: object" in result.answer) results.append( DocsEvalCaseResult( case=case, intent_ok=result.router_result.intent == case.expected_intent and result.router_result.query_plan.sub_intent == case.expected_sub_intent, layers_ok=actual_layers == case.expected_layers, retrieval_non_empty=bool(result.raw_rows), openapi_complete=openapi_complete, gate_ok=gate_ok, openapi_output_ok=openapi_output_ok, ) ) return results