52 lines
2.3 KiB
Python
52 lines
2.3 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from app.modules.agent.intent_router_v2 import IntentRouterV2
|
|
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
|
from tests.docs_qa_eval.config import DocsEvalConfig
|
|
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
|
|
from tests.docs_qa_eval.golden_loader import DocsGoldenCase, load_cases
|
|
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
|
|
|
|
|
@dataclass(slots=True, frozen=True)
|
|
class DocsEvalCaseResult:
|
|
case: DocsGoldenCase
|
|
intent_ok: bool
|
|
layers_ok: bool
|
|
retrieval_non_empty: bool
|
|
openapi_complete: bool
|
|
gate_ok: bool
|
|
openapi_output_ok: bool
|
|
|
|
|
|
def run_eval(config: DocsEvalConfig) -> list[DocsEvalCaseResult]:
|
|
router = IntentRouterV2()
|
|
cases = load_cases(config.golden_cases_path)
|
|
results: list[DocsEvalCaseResult] = []
|
|
for case in cases:
|
|
pipeline = DocsQAPipelineRunner(router=router, retrieval_adapter=InMemoryDocsRetrievalAdapter(list(case.rows)), repo_context=repo_context())
|
|
result = pipeline.run(case.query, rag_session_id="docs-test-session", mode=config.pipeline_mode)
|
|
actual_layers = tuple(item.layer_id for item in result.router_result.retrieval_spec.layer_queries)
|
|
diagnostics = result.diagnostics
|
|
openapi_complete = True
|
|
gate_ok = diagnostics.gate_decision in {"allow", "partial"}
|
|
openapi_output_ok = True
|
|
if case.expected_intent == "OPENAPI_GENERATION":
|
|
openapi_complete = diagnostics.openapi_status["has_path"] and diagnostics.openapi_status["has_method"]
|
|
gate_ok = diagnostics.gate_decision in {"allow", "partial"}
|
|
openapi_output_ok = bool(result.answer.strip()) and ("paths:" in result.answer or "type: object" in result.answer)
|
|
results.append(
|
|
DocsEvalCaseResult(
|
|
case=case,
|
|
intent_ok=result.router_result.intent == case.expected_intent and result.router_result.query_plan.sub_intent == case.expected_sub_intent,
|
|
layers_ok=actual_layers == case.expected_layers,
|
|
retrieval_non_empty=bool(result.raw_rows),
|
|
openapi_complete=openapi_complete,
|
|
gate_ok=gate_ok,
|
|
openapi_output_ok=openapi_output_ok,
|
|
)
|
|
)
|
|
return results
|