27 lines
938 B
Python
27 lines
938 B
Python
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from tests.docs_qa_eval.config import DocsEvalConfig
|
|
from tests.docs_qa_eval.golden_loader import load_cases
|
|
from tests.docs_qa_eval.runner import run_eval
|
|
|
|
|
|
pytestmark = pytest.mark.docs_qa_eval
|
|
|
|
|
|
def test_load_cases_has_minimum_suite() -> None:
|
|
cases = load_cases(DocsEvalConfig().golden_cases_path)
|
|
assert len(cases) >= 35
|
|
|
|
|
|
def test_run_eval_all_cases_pass_core_checks() -> None:
|
|
results = run_eval(DocsEvalConfig())
|
|
assert results
|
|
assert all(item.intent_ok for item in results)
|
|
assert all(item.layers_ok for item in results)
|
|
assert all(item.retrieval_non_empty for item in results)
|
|
assert all(item.gate_ok for item in results)
|
|
assert all(item.openapi_complete for item in results if item.case.expected_intent == "OPENAPI_GENERATION")
|
|
assert all(item.openapi_output_ok for item in results if item.case.expected_intent == "OPENAPI_GENERATION")
|