Фиксируем состояние
This commit is contained in:
@@ -0,0 +1,323 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.agent.code_qa_runtime.post_gate import CodeQaPostEvidenceGate
|
||||
from app.modules.agent.code_qa_runtime.prompt_payload_builder import CodeQaPromptPayloadBuilder
|
||||
from app.modules.rag.code_qa_pipeline.answer_synthesis import build_answer_synthesis_input
|
||||
from app.modules.rag.code_qa_pipeline.contracts import CodeChunkItem, EvidenceBundle
|
||||
from app.modules.rag.code_qa_pipeline.retrieval_result_builder import build_retrieval_result
|
||||
|
||||
|
||||
def test_retrieval_result_separates_semantic_hints_and_relations() -> None:
|
||||
raw = [
|
||||
{
|
||||
"layer": "C2_DEPENDENCY_GRAPH",
|
||||
"path": "src/runtime.py",
|
||||
"content": "RuntimeManager calls TraceService",
|
||||
"span_start": 10,
|
||||
"span_end": 10,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
},
|
||||
{
|
||||
"layer": "C4_SEMANTIC_ROLES",
|
||||
"path": "src/runtime.py",
|
||||
"title": "RuntimeManager",
|
||||
"content": "role: orchestrator",
|
||||
"span_start": 1,
|
||||
"span_end": 20,
|
||||
"metadata": {"symbol_name": "RuntimeManager", "role": "orchestrator", "confidence": 0.7},
|
||||
},
|
||||
]
|
||||
result = build_retrieval_result(raw, {"executed_layers": ["C2_DEPENDENCY_GRAPH", "C4_SEMANTIC_ROLES"]}, {"status": "resolved"})
|
||||
assert result.relations[0]["edge_type"] == "calls"
|
||||
assert result.relations[0]["source"] == "RuntimeManager.start"
|
||||
assert result.semantic_hints[0]["role"] == "orchestrator"
|
||||
|
||||
|
||||
def test_answer_synthesis_curates_explain_facts_and_demotes_c4() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="EXPLAIN",
|
||||
resolved_target="RuntimeManager",
|
||||
target_type="symbol",
|
||||
evidence_count=4,
|
||||
sufficient=True,
|
||||
code_chunks=[
|
||||
CodeChunkItem(
|
||||
layer="C1_SYMBOL_CATALOG",
|
||||
path="src/runtime.py",
|
||||
title="RuntimeManager.__init__",
|
||||
content="__init__(self, tracer, registry)",
|
||||
start_line=1,
|
||||
end_line=3,
|
||||
metadata={"qname": "RuntimeManager.__init__", "kind": "method", "signature": "__init__(self, tracer, registry)"},
|
||||
),
|
||||
CodeChunkItem(
|
||||
layer="C0_SOURCE_CHUNKS",
|
||||
path="src/runtime.py",
|
||||
title="",
|
||||
content="self.tracer = tracer\nself.registry = registry\nself.tracer.record()\n",
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
metadata={},
|
||||
),
|
||||
CodeChunkItem(
|
||||
layer="C4_SEMANTIC_ROLES",
|
||||
path="src/runtime.py",
|
||||
title="RuntimeManager",
|
||||
content="role: orchestrator",
|
||||
start_line=1,
|
||||
end_line=4,
|
||||
metadata={"symbol_name": "RuntimeManager", "role": "orchestrator"},
|
||||
),
|
||||
],
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"start_line": 3,
|
||||
"end_line": 3,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.__init__", "dst_ref": "self.tracer.record"},
|
||||
}
|
||||
],
|
||||
)
|
||||
synthesis = build_answer_synthesis_input("Explain RuntimeManager", bundle)
|
||||
explain = synthesis.curated_facts["explain"]
|
||||
assert "RuntimeManager.__init__" in explain["required_methods"]
|
||||
assert "tracer" in explain["required_constructor_args"]
|
||||
assert "record" in synthesis.deep_context or "self.tracer.record" in json.dumps(explain)
|
||||
assert "orchestrator" not in synthesis.deep_context
|
||||
assert synthesis.semantic_hints[0]["role"] == "orchestrator"
|
||||
|
||||
|
||||
def test_prompt_payload_builder_adds_explain_constraints() -> None:
|
||||
bundle = EvidenceBundle(resolved_sub_intent="EXPLAIN", resolved_target="RuntimeManager")
|
||||
synthesis = build_answer_synthesis_input(
|
||||
"Explain RuntimeManager",
|
||||
EvidenceBundle(
|
||||
resolved_sub_intent="EXPLAIN",
|
||||
resolved_target="RuntimeManager",
|
||||
code_chunks=[
|
||||
CodeChunkItem(
|
||||
layer="C1_SYMBOL_CATALOG",
|
||||
path="src/runtime.py",
|
||||
title="RuntimeManager.start",
|
||||
content="start(self)",
|
||||
metadata={"qname": "RuntimeManager.start", "kind": "method", "signature": "start(self)"},
|
||||
)
|
||||
],
|
||||
evidence_count=1,
|
||||
),
|
||||
)
|
||||
payload = json.loads(
|
||||
CodeQaPromptPayloadBuilder().build(
|
||||
user_query="Explain RuntimeManager",
|
||||
synthesis_input=synthesis,
|
||||
evidence_pack=bundle,
|
||||
answer_mode="normal",
|
||||
)
|
||||
)
|
||||
assert "must_mention_methods" in payload
|
||||
assert "RuntimeManager.start" in payload["must_mention_methods"]
|
||||
assert payload["must_not_infer_missing_details"] is True
|
||||
|
||||
|
||||
def test_prompt_payload_builder_adds_trace_flow_constraints() -> None:
|
||||
synthesis = build_answer_synthesis_input(
|
||||
"Trace RuntimeManager",
|
||||
EvidenceBundle(
|
||||
resolved_sub_intent="TRACE_FLOW",
|
||||
resolved_target="RuntimeManager",
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"start_line": 10,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
},
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"start_line": 11,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "TraceService.record", "dst_ref": "Registry.register"},
|
||||
},
|
||||
],
|
||||
evidence_count=2,
|
||||
),
|
||||
)
|
||||
payload = json.loads(
|
||||
CodeQaPromptPayloadBuilder().build(
|
||||
user_query="Trace RuntimeManager",
|
||||
synthesis_input=synthesis,
|
||||
evidence_pack=EvidenceBundle(resolved_sub_intent="TRACE_FLOW", resolved_target="RuntimeManager"),
|
||||
answer_mode="normal",
|
||||
)
|
||||
)
|
||||
assert payload["must_mention_flow_steps"]
|
||||
assert payload["must_avoid_overclaiming_full_flow"] is True
|
||||
|
||||
|
||||
def test_post_gate_rejects_vague_explain_without_concrete_facts() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="EXPLAIN",
|
||||
resolved_target="RuntimeManager",
|
||||
evidence_count=3,
|
||||
code_chunks=[
|
||||
CodeChunkItem(
|
||||
layer="C1_SYMBOL_CATALOG",
|
||||
path="src/runtime.py",
|
||||
title="RuntimeManager.start",
|
||||
content="start(self)",
|
||||
metadata={"qname": "RuntimeManager.start", "kind": "method", "signature": "start(self)"},
|
||||
)
|
||||
],
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
}
|
||||
],
|
||||
)
|
||||
result = CodeQaPostEvidenceGate().validate(
|
||||
answer="RuntimeManager имеет responsibilities и управляет системой.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="EXPLAIN",
|
||||
user_query="Explain RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
assert result.passed is False
|
||||
assert "missing_concrete_methods" in result.reasons
|
||||
assert "too_vague_for_explain" in result.reasons
|
||||
|
||||
|
||||
def test_post_gate_accepts_explain_with_method_alias_and_call() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="EXPLAIN",
|
||||
resolved_target="RuntimeManager",
|
||||
evidence_count=3,
|
||||
code_chunks=[
|
||||
CodeChunkItem(
|
||||
layer="C1_SYMBOL_CATALOG",
|
||||
path="src/runtime.py",
|
||||
title="RuntimeManager.start",
|
||||
content="start(self)",
|
||||
metadata={"qname": "RuntimeManager.start", "kind": "method", "signature": "start(self)"},
|
||||
)
|
||||
],
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
}
|
||||
],
|
||||
)
|
||||
result = CodeQaPostEvidenceGate().validate(
|
||||
answer="RuntimeManager запускает работу через метод start(), а затем вызывает record() у TraceService.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="EXPLAIN",
|
||||
user_query="Explain RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
assert result.passed is True
|
||||
|
||||
|
||||
def test_post_gate_requires_architecture_relations() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="ARCHITECTURE",
|
||||
resolved_target="RuntimeManager",
|
||||
evidence_count=3,
|
||||
code_chunks=[
|
||||
CodeChunkItem(layer="C1_SYMBOL_CATALOG", path="src/runtime.py", title="RuntimeManager", content="", metadata={"qname": "RuntimeManager", "kind": "class"}),
|
||||
CodeChunkItem(layer="C4_SEMANTIC_ROLES", path="src/runtime.py", title="RuntimeManager", content="", metadata={"symbol_name": "RuntimeManager", "role": "orchestrator"}),
|
||||
],
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
}
|
||||
],
|
||||
)
|
||||
gate = CodeQaPostEvidenceGate()
|
||||
vague = gate.validate(
|
||||
answer="RuntimeManager и TraceService образуют центральный компонент runtime.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="ARCHITECTURE",
|
||||
user_query="Architecture of RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
concrete = gate.validate(
|
||||
answer="RuntimeManager.start вызывает TraceService.record в src/runtime.py.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="ARCHITECTURE",
|
||||
user_query="Architecture of RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
assert vague.passed is False
|
||||
assert "missing_relation_verbs" in vague.reasons or "missing_concrete_relations" in vague.reasons
|
||||
assert concrete.passed is True
|
||||
|
||||
|
||||
def test_post_gate_rejects_architecture_with_retrieval_labels() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="ARCHITECTURE",
|
||||
resolved_target="RuntimeManager",
|
||||
evidence_count=2,
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.dataflow_slice"},
|
||||
}
|
||||
],
|
||||
)
|
||||
result = CodeQaPostEvidenceGate().validate(
|
||||
answer="RuntimeManager связан с dataflow_slice и строит вокруг него архитектуру.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="ARCHITECTURE",
|
||||
user_query="Architecture of RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
assert result.passed is False
|
||||
assert "contains_retrieval_artifacts" in result.reasons
|
||||
|
||||
|
||||
def test_post_gate_trace_flow_requires_sequence_and_blocks_overclaim() -> None:
|
||||
bundle = EvidenceBundle(
|
||||
resolved_sub_intent="TRACE_FLOW",
|
||||
resolved_target="RuntimeManager",
|
||||
evidence_count=3,
|
||||
relations=[
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"start_line": 10,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "RuntimeManager.start", "dst_ref": "TraceService.record"},
|
||||
},
|
||||
{
|
||||
"path": "src/runtime.py",
|
||||
"start_line": 11,
|
||||
"metadata": {"edge_type": "calls", "src_qname": "TraceService.record", "dst_ref": "Registry.register"},
|
||||
},
|
||||
],
|
||||
)
|
||||
gate = CodeQaPostEvidenceGate()
|
||||
vague = gate.validate(
|
||||
answer="RuntimeManager инициализирует службы и полностью восстанавливается.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="TRACE_FLOW",
|
||||
user_query="Trace RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
concrete = gate.validate(
|
||||
answer="Сначала RuntimeManager.start вызывает TraceService.record, затем TraceService.record вызывает Registry.register.",
|
||||
answer_mode="normal",
|
||||
degraded_message="",
|
||||
sub_intent="TRACE_FLOW",
|
||||
user_query="Trace RuntimeManager",
|
||||
evidence_pack=bundle,
|
||||
)
|
||||
assert vague.passed is False
|
||||
assert "missing_flow_steps" in vague.reasons or "too_vague_for_trace_flow" in vague.reasons
|
||||
assert "overclaims_trace_completeness" in vague.reasons
|
||||
assert concrete.passed is True
|
||||
Reference in New Issue
Block a user