53 lines
2.1 KiB
Python
53 lines
2.1 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from tests.pipeline_setup_v3.core.artifacts import ArtifactWriter
|
|
from tests.pipeline_setup_v3.core.case_loader import CaseDirectoryLoader
|
|
from tests.pipeline_setup_v3.core.models import V3CaseResult
|
|
from tests.pipeline_setup_v3.core.session_provider import RagSessionProvider
|
|
from tests.pipeline_setup_v3.core.validators import CaseValidator
|
|
|
|
|
|
class V3Runner:
|
|
def __init__(self, cases_dir: Path, results_dir: Path, run_name: str) -> None:
|
|
self._cases_dir = cases_dir
|
|
self._validator = CaseValidator()
|
|
self._sessions = RagSessionProvider()
|
|
self._agent_runtime = None
|
|
self._writer = ArtifactWriter(results_dir, run_name=run_name, started_at=datetime.now())
|
|
|
|
@property
|
|
def run_dir(self) -> Path:
|
|
return self._writer.run_dir
|
|
|
|
def run(self) -> tuple[list[V3CaseResult], Path]:
|
|
results: list[V3CaseResult] = []
|
|
for case in CaseDirectoryLoader().load(self._cases_dir):
|
|
rag_session_id = self._sessions.resolve(case.input)
|
|
payload = self._execute(case, rag_session_id)
|
|
mismatches = self._validator.validate(case, payload.actual, payload.details)
|
|
result = V3CaseResult(
|
|
case=case,
|
|
actual=payload.actual,
|
|
details=payload.details,
|
|
passed=not mismatches,
|
|
mismatches=mismatches,
|
|
)
|
|
self._writer.write_case(result)
|
|
results.append(result)
|
|
return results, self._writer.write_summary(results)
|
|
|
|
def _execute(self, case, rag_session_id):
|
|
if case.runner != "agent_runtime":
|
|
raise ValueError(f"Unsupported runner: {case.runner}")
|
|
return self._agent_runtime_adapter().execute(case, rag_session_id)
|
|
|
|
def _agent_runtime_adapter(self):
|
|
if self._agent_runtime is None:
|
|
from tests.pipeline_setup_v3.runtime.agent_runtime_adapter import AgentRuntimeAdapter
|
|
|
|
self._agent_runtime = AgentRuntimeAdapter()
|
|
return self._agent_runtime
|