This commit is contained in:
2026-03-27 15:51:10 +03:00
parent 15586f9a8c
commit 51378c5d66
1234 changed files with 95644 additions and 543076 deletions
+149 -14
View File
@@ -15,18 +15,9 @@ class ArtifactWriter:
def write_case(self, result: V3CaseResult) -> None:
stem = f"{result.case.source_file.stem}_{result.case.case_id}"
payload = {
"case_id": result.case.case_id,
"source_file": result.case.source_file.as_posix(),
"runner": result.case.runner,
"mode": result.case.mode,
"query": result.case.query,
"actual": result.actual,
"passed": result.passed,
"mismatches": result.mismatches,
"details": result.details,
}
payload = self._json_payload(result)
(self.run_dir / f"{stem}.json").write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
diagnostics = self._diagnostics_without_prompt(result.details)
lines = [
f"# {result.case.case_id}",
"",
@@ -41,17 +32,161 @@ class ArtifactWriter:
"## Actual",
json.dumps(result.actual, ensure_ascii=False, indent=2),
"",
"## Steps",
json.dumps(result.details.get("steps") or [], ensure_ascii=False, indent=2),
"## Pipeline Steps",
*self._md_steps(result),
"",
"## Diagnostics",
json.dumps(result.details.get("diagnostics") or {}, ensure_ascii=False, indent=2),
json.dumps(diagnostics, ensure_ascii=False, indent=2),
"",
*self._llm_request_section(result.details),
"",
"## Mismatches",
*([f"- {item}" for item in result.mismatches] or ["- none"]),
]
(self.run_dir / f"{stem}.md").write_text("\n".join(lines), encoding="utf-8")
def _json_payload(self, result: V3CaseResult) -> dict:
return {
"meta": {
"case_id": result.case.case_id,
"source_file": result.case.source_file.as_posix(),
"runner": result.case.runner,
"mode": result.case.mode,
"passed": result.passed,
"mismatches": result.mismatches,
"actual": result.actual,
},
"pipeline_steps": list(result.details.get("pipeline_steps") or []),
}
def _md_steps(self, result: V3CaseResult) -> list[str]:
steps = list(result.details.get("pipeline_steps") or [])
if not steps:
return ["- none"]
lines: list[str] = []
for item in steps:
step = str(item.get("step") or "").strip() or "unknown"
status = str(item.get("status") or "").strip()
lines.append(f"### {step}")
if status:
lines.append(f"- status: {status}")
timings = item.get("timings_ms") or {}
if timings:
lines.append(f"- timings_ms: {json.dumps(timings, ensure_ascii=False)}")
lines.append("```json")
lines.append(json.dumps({"input": item.get("input") or {}, "output": item.get("output") or {}}, ensure_ascii=False, indent=2))
lines.append("```")
lines.append("")
return lines[:-1] if lines and not lines[-1] else lines
def _diagnostics_without_prompt(self, details: dict) -> dict:
diagnostics = dict(details.get("diagnostics") or {})
diagnostics.pop("prompt", None)
return diagnostics
def _llm_request_section(self, details: dict) -> list[str]:
llm_request = dict(details.get("llm_request") or {})
if not llm_request:
llm_request = dict((details.get("diagnostics") or {}).get("prompt") or {})
if not llm_request:
return []
lines = [
"## LLM Request",
f"- prompt_name: {llm_request.get('prompt_name') or ''}",
f"- log_context: {llm_request.get('log_context') or ''}",
]
prompt_stats = dict(llm_request.get("prompt_stats") or {})
if prompt_stats:
lines.extend(
[
"",
"### Prompt Stats",
"```json",
json.dumps(prompt_stats, ensure_ascii=False, indent=2),
"```",
]
)
system_prompt = str(llm_request.get("system_prompt") or "").strip()
user_prompt = str(llm_request.get("user_prompt") or "").strip()
lines.extend(
[
"",
"### System Prompt",
"```text",
system_prompt or "",
"```",
"",
"### User Prompt",
*self._render_user_prompt(user_prompt),
]
)
return lines
def _render_user_prompt(self, user_prompt: str) -> list[str]:
payload = self._parse_json(user_prompt)
if payload is None:
return ["```text", user_prompt or "", "```"]
lines = ["```json", json.dumps(payload, ensure_ascii=False, indent=2), "```"]
lines.extend(self._prompt_overview(payload))
return lines
def _prompt_overview(self, payload: dict) -> list[str]:
lines = ["", "### User Prompt Overview"]
for key in ("question", "intent", "sub_intent"):
value = payload.get(key)
if value is not None:
lines.append(f"- {key}: {value}")
lines.extend(self._prompt_collection_line("documents", payload.get("documents")))
lines.extend(self._prompt_collection_line("facts", payload.get("facts")))
lines.extend(self._prompt_collection_line("relations", payload.get("relations")))
api_contract = payload.get("api_contract")
if isinstance(api_contract, dict):
lines.append("- api_contract:")
lines.extend(self._api_contract_lines(api_contract))
return lines
def _prompt_collection_line(self, name: str, value) -> list[str]:
items = value if isinstance(value, list) else []
if not items:
return [f"- {name}: 0"]
samples: list[str] = []
for item in items[:3]:
if not isinstance(item, dict):
continue
sample = str(
item.get("title")
or item.get("content")
or item.get("path")
or item.get("doc_id")
or item.get("id")
or ""
).strip()
if sample:
samples.append(" ".join(sample.split()))
suffix = f" | samples: {', '.join(samples)}" if samples else ""
return [f"- {name}: {len(items)}{suffix}"]
def _api_contract_lines(self, api_contract: dict) -> list[str]:
lines: list[str] = []
path = str(api_contract.get("path") or "").strip() or ""
method = str(api_contract.get("method") or "").strip() or ""
request_schema = api_contract.get("request_schema")
response_schema = api_contract.get("response_schema")
lines.append(f" path: {path}")
lines.append(f" method: {method}")
lines.append(f" has_request_schema: {bool(request_schema)}")
lines.append(f" has_response_schema: {bool(response_schema)}")
return lines
def _parse_json(self, text: str) -> dict | None:
if not text.strip():
return None
try:
payload = json.loads(text)
except json.JSONDecodeError:
return None
return payload if isinstance(payload, dict) else None
def write_summary(self, results: list[V3CaseResult]) -> Path:
path = self.run_dir / "summary.md"
path.write_text(SummaryComposer().compose(results), encoding="utf-8")