This commit is contained in:
2026-04-09 15:41:07 +03:00
parent f62fb678b8
commit 2352f91cd3
192 changed files with 6983 additions and 996 deletions
@@ -4,43 +4,42 @@ import asyncio
from dataclasses import asdict, dataclass
from pathlib import Path
from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
from app.core.agent.processes.v2 import V2IntentRouter
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2Intent, V2Subintent
from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
from app.core.agent.processes.v2 import V2IntentRouter, V2Process
from app.core.agent.utils.llm import AgentLlmService, PromptLoader
from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder
from app.core.rag.persistence import RagRepository
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
from app.core.shared.gigachat.client import GigaChatClient
from app.core.shared.gigachat.settings import GigaChatSettings
from app.core.shared.gigachat.token_provider import GigaChatTokenProvider
from app.infra.observability.module_trace import RequestTraceContext
from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.process_v2.models import V2Intent
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex, V2RagRetrievalAdapter
from tests.pipeline_setup_v3.core.models import ExecutionPayload, V3Case
from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
class V2ProcessAdapter:
def __init__(self, *, workflow_llm_enabled: bool = True) -> None:
self._workflow_llm_enabled = workflow_llm_enabled
self._router = V2IntentRouter(llm=_build_v2_llm())
self._llm = _build_v2_llm()
self._router = V2IntentRouter(llm=_KeywordLlm(), enable_llm_disambiguation=True)
self._policy = V2RetrievalPolicyResolver()
retriever = RagSessionRetriever(repository=RagRepository(), embedder=GigaChatEmbedder(_build_client()))
retriever = RagSessionRetriever(repository=RagRepository(), embedder=DeterministicEmbedder())
self._retrieval = V2RagRetrievalAdapter(retriever)
self._evidence = DocsEvidenceAssembler()
self._gate = DocsEvidenceGate()
self._summary_graph = DocsExplainSummaryGraph(_build_v2_llm())
self._find_files_graph = DocsExplainFindFilesGraph()
self._general_graph = GeneralSummaryGraph(_build_v2_llm())
self._process = V2Process(
llm=self._llm,
policy_resolver=self._policy,
rag_adapter=self._retrieval,
evidence_assembler=DocsEvidenceAssembler(),
evidence_gate=DocsEvidenceGate(),
router=self._router,
workflow_llm_enabled=workflow_llm_enabled,
)
def execute(self, case: V3Case, rag_session_id: str | None) -> ExecutionPayload:
return asyncio.run(self._execute_async(case, rag_session_id))
@@ -81,6 +80,8 @@ class V2ProcessAdapter:
actual=_actual_from_v2(route),
details=_details(case.query, route=route, pipeline_steps=_build_pipeline_steps(runtime.logs)),
)
if case.mode == "full_chain":
return await self._execute_full_chain(case, rag_session_id, route)
plan = self._policy.resolve(route)
_log_pipeline_step(
runtime,
@@ -121,26 +122,7 @@ class V2ProcessAdapter:
actual=_actual_from_v2(route, rows=rows, plan=plan, answer_mode="partial"),
details=_details(case.query, route=route, plan=plan, rows=rows, pipeline_steps=_build_pipeline_steps(runtime.logs)),
)
answer, evidence, gate = await self._run_workflow(runtime, route, rag_session_id, rows)
answer_mode = gate.answer_mode
_log_pipeline_step(
runtime,
"answer_generated",
{"answer_mode": answer_mode, "answer_length": len(answer)},
)
return ExecutionPayload(
actual=_actual_from_v2(route, rows=rows, plan=plan, answer=answer, answer_mode=answer_mode),
details=_details(
case.query,
route=route,
plan=plan,
rows=rows,
evidence=evidence,
answer=answer,
logs=runtime.logs,
pipeline_steps=_build_pipeline_steps(runtime.logs),
),
)
raise ValueError(f"Unsupported process_v2 adapter mode: {case.mode}")
async def _retrieve_rows(self, route, rag_session_id: str | None, plan) -> list[dict]:
if not rag_session_id:
@@ -173,125 +155,54 @@ class V2ProcessAdapter:
merged.append(row)
return merged
async def _run_workflow(
self,
runtime: "_RuntimeStub",
route,
rag_session_id: str | None,
rows: list[dict],
) -> tuple[str, dict, object]:
if route.intent == V2Intent.GENERAL_QA:
documents = self._evidence.assemble_summaries(rows, route)
gate = self._gate.check_summaries(route, documents)
_log_pipeline_step(
runtime,
"evidence_assembled",
{"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
)
self._log_ranking(runtime, documents)
_log_pipeline_step(
runtime,
"evidence_gate_checked",
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
)
context = GeneralSummaryContext(runtime=runtime, route=route, prompt_name="v2_general.summary_answer")
context.workflow_llm_enabled = self._workflow_llm_enabled
context.documents = documents
context.gate_decision = gate
final = await self._general_graph.run(context)
return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate
if route.subintent == V2Subintent.FIND_FILES:
files = self._evidence.assemble_files(rows, route)
gate = self._gate.check_files(route, files)
_log_pipeline_step(
runtime,
"evidence_assembled",
{"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)},
)
self._log_ranking(runtime, files)
_log_pipeline_step(
runtime,
"evidence_gate_checked",
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
)
context = DocsExplainFindFilesContext(
runtime=runtime,
route=route,
rag_session_id=rag_session_id or "",
files=files,
gate_decision=gate,
)
final = await self._find_files_graph.run(context)
return final.answer, {"documents": [], "files": [_serialize_file(item) for item in files]}, gate
documents = self._evidence.assemble_summaries(rows, route)
gate = self._gate.check_summaries(route, documents)
_log_pipeline_step(
runtime,
"evidence_assembled",
{"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
)
self._log_ranking(runtime, documents)
_log_pipeline_step(
runtime,
"evidence_gate_checked",
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
)
context = DocsExplainSummaryContext(
runtime=runtime,
route=route,
rag_session_id=rag_session_id or "",
prompt_name="v2_docs_explain.summary_answer",
workflow_llm_enabled=self._workflow_llm_enabled,
documents=documents,
gate_decision=gate,
)
final = await self._summary_graph.run(context)
return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate
def _trace_row(self, row: dict) -> dict[str, object]:
metadata = dict(row.get("metadata") or {})
return {
"path": str(row.get("path") or ""),
"layer": str(row.get("layer") or ""),
"title": str(row.get("title") or ""),
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""),
}
def _log_ranking(self, runtime: "_RuntimeStub", items: list) -> None:
top_docs: list[dict[str, object]] = []
for item in items[:4]:
top_docs.append(
{
"doc": getattr(item, "path", ""),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
}
)
_log_pipeline_step(
runtime,
"ranking_explained",
{
"doc": getattr(item, "path", ""),
"score": getattr(item, "score", 0),
"score_breakdown": getattr(item, "score_breakdown", {}),
"match_reason": getattr(item, "match_reason", ""),
},
)
_log_pipeline_step(
runtime,
"ranking_explained",
{
"top_docs_after_ranking": top_docs,
"ranking_score_breakdown": [
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
}
for item in items[:4]
],
async def _execute_full_chain(self, case: V3Case, rag_session_id: str | None, route) -> ExecutionPayload:
runtime = _RuntimeStub(query=case.query, rag_session_id=rag_session_id)
result = await self._process.run(runtime)
retrieval_plan = _event_payload(runtime.logs, "process.v2.retrieval_policy", "retrieval_plan_resolved")
rows = list(_event_payload(runtime.logs, "process.v2.rag_retrieval", "rag_rows_fetched").get("rows") or [])
answer_generated = _event_payload(runtime.logs, "process.v2.pipeline", "answer_generated")
return ExecutionPayload(
actual={
"domain": route.routing_domain,
"intent": route.intent,
"sub_intent": route.subintent,
"rag_count": len(rows),
"llm_answer": result.answer,
"answer_mode": str(answer_generated.get("answer_mode") or ""),
"path_scope": tuple(),
"symbol_candidates": tuple(),
"entity_candidates": tuple(_entity_candidates(rows)),
"doc_scope": tuple(_doc_scope(rows)),
"layers": tuple(retrieval_plan.get("layers") or []),
"filters": dict(retrieval_plan.get("filters") or {}),
},
details={
"query": case.query,
"router_result": asdict(route),
"retrieval_plan": retrieval_plan,
"rows": rows,
"answer": result.answer,
"logs": runtime.logs,
"pipeline_steps": _build_pipeline_steps(runtime.logs),
},
)
def _trace_row(self, row: dict) -> dict[str, object]:
metadata = row.get("metadata") or {}
content = str(row.get("content") or "").strip()
return {
"layer": str(row.get("layer") or ""),
"path": str(row.get("path") or ""),
"title": str(row.get("title") or ""),
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
"entity_name": str(metadata.get("entity_name") or ""),
"summary_text": str(metadata.get("summary_text") or "")[:400],
"section_path": str(metadata.get("section_path") or ""),
"metadata_domain": str(metadata.get("domain") or ""),
"metadata_subdomain": str(metadata.get("subdomain") or ""),
"content_preview": content[:400],
}
@dataclass(slots=True)
class _RequestStub:
@@ -320,10 +231,10 @@ class _TraceLoggerStub:
class _RuntimeStub:
def __init__(self, *, query: str) -> None:
def __init__(self, *, query: str, rag_session_id: str | None = None) -> None:
self.logs: list[dict] = []
self.request = _RequestStub(request_id="pipeline_setup_v3", message=query)
self.session = _SessionStub()
self.session = _SessionStub(active_rag_session_id=rag_session_id)
self.publisher = _PublisherStub()
self.trace = RequestTraceContext(request_id=self.request.request_id, logger=_TraceLoggerStub(self.logs))
@@ -335,8 +246,10 @@ def _build_client() -> GigaChatClient:
def _build_v2_llm() -> AgentLlmService:
prompt_paths = [
Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/prompts.yml",
Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/general_prompts.yml",
Path(__file__).resolve().parents[3]
/ "src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml",
Path(__file__).resolve().parents[3]
/ "src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml",
Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/intent_router/routers/prompts.yml",
]
return AgentLlmService(client=_build_client(), prompts=PromptLoader(prompt_paths))
@@ -375,7 +288,12 @@ def _doc_scope(rows: list[dict]) -> list[str]:
values: list[str] = []
for row in rows:
metadata = dict(row.get("metadata") or {})
for candidate in (metadata.get("document_id"), metadata.get("doc_id"), row.get("path")):
for candidate in (
row.get("document_id"),
metadata.get("document_id"),
metadata.get("doc_id"),
row.get("path"),
):
value = str(candidate or "").strip()
if value and value not in values:
values.append(value)
@@ -386,20 +304,12 @@ def _entity_candidates(rows: list[dict]) -> list[str]:
values: list[str] = []
for row in rows:
metadata = dict(row.get("metadata") or {})
value = str(metadata.get("entity_name") or row.get("title") or "").strip()
value = str(row.get("entity_name") or metadata.get("entity_name") or row.get("title") or "").strip()
if value and value not in values and str(row.get("layer") or "") == "D3_ENTITY_CATALOG":
values.append(value)
return values
def _serialize_summary(item: RetrievedSummary) -> dict:
return asdict(item)
def _serialize_file(item: RetrievedFile) -> dict:
return asdict(item)
def _build_pipeline_steps(logs: list[dict]) -> list[dict]:
steps: list[dict] = []
for item in logs:
@@ -409,6 +319,16 @@ def _build_pipeline_steps(logs: list[dict]) -> list[dict]:
return steps
def _event_payload(logs: list[dict], module: str, event: str) -> dict[str, object]:
for item in logs:
if item.get("module") == module and item.get("event") == event:
payload = item.get("payload") or {}
if isinstance(payload, dict):
return dict(payload)
return {}
return {}
def _log_pipeline_step(runtime: _RuntimeStub, step: str, payload: dict[str, object]) -> None:
runtime.logs.append(
{
@@ -14,7 +14,7 @@ from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
class ProcessV2FullChainExecutor:
def __init__(self) -> None:
self._adapter = V2ProcessAdapter(workflow_llm_enabled=True)
self._adapter = V2ProcessAdapter(workflow_llm_enabled=False)
def execute(self, case: V4Case) -> ExecutionPayload:
if not case.rag_session_id:
@@ -27,7 +27,7 @@ class ProcessV2FullChainExecutor:
pipeline_steps = list(payload.details.get("pipeline_steps") or [])
pipeline_summary = {
"answer_mode": str(payload.actual.get("answer_mode") or ""),
"workflow_llm_enabled": True,
"workflow_llm_enabled": False,
"step_count": len(pipeline_steps),
"steps": [str(step.get("step") or "") for step in pipeline_steps if str(step.get("step") or "").strip()],
}
@@ -102,8 +102,8 @@ def _summarize_rows(rows: list[dict]) -> dict[str, object]:
path = str(row.get("path") or "").strip()
layer = str(row.get("layer") or "").strip()
metadata = dict(row.get("metadata") or {})
domain = str(metadata.get("domain") or "").strip()
subdomain = str(metadata.get("subdomain") or "").strip()
domain = str(row.get("metadata_domain") or metadata.get("domain") or "").strip()
subdomain = str(row.get("metadata_subdomain") or metadata.get("subdomain") or "").strip()
if path and path not in paths:
paths.append(path)
if layer and layer not in layers:
@@ -2,8 +2,8 @@ from __future__ import annotations
from dataclasses import asdict
from app.core.agent.processes.v2.models import V2RouteAnchors, V2RouteResult
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.models import V2RouteAnchors, V2RouteResult
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
@@ -3,7 +3,7 @@ from __future__ import annotations
from dataclasses import asdict
from app.core.agent.processes.v2 import V2IntentRouter
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
@@ -4,10 +4,10 @@ import asyncio
from dataclasses import asdict
from app.core.agent.processes.v2 import V2IntentRouter
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.rag.persistence.repository import RagRepository
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
@@ -0,0 +1,68 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
ApiEndpointCollector,
)
def test_collector_returns_method_and_path_lines() -> None:
rows = [
{
"metadata": {
"endpoint": "GET|POST /actions/{action}",
"summary_text": "Endpoint for runtime control actions",
},
"title": "HTTP API /actions/{action}",
"path": "docs/api/control-actions-endpoint.md",
"content": "",
},
{
"metadata": {
"endpoint": "GET /health",
},
"title": "HTTP API /health",
"path": "docs/api/health-endpoint.md",
"content": "",
},
]
endpoints = ApiEndpointCollector().collect(rows)
assert "GET /actions/{action}" in endpoints
assert "POST /actions/{action}" in endpoints
assert "GET /health" in endpoints
def test_collector_ignores_file_paths_from_content() -> None:
rows = [
{
"metadata": {
"endpoint": "GET /health",
"summary_text": "Uses src/telegram_notify_app/control_api.py",
},
"title": "Health endpoint",
"path": "docs/api/health-endpoint.md",
"content": "See /telegram_notify_app/control_api.py and /telegram_notify_app/worker.py",
}
]
endpoints = ApiEndpointCollector().collect(rows)
assert endpoints == ["GET /health"]
def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None:
rows = [
{
"metadata": {
"summary_text": "Control actions endpoint",
},
"title": "HTTP API /actions/{action}",
"path": "docs/api/control-actions-endpoint.md",
"content": "",
}
]
endpoints = ApiEndpointCollector().collect(rows)
assert endpoints == ["GET /actions/{action}"]
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
def _route(*, hints: list[str], terms: list[str], subintent: str = V2Subintent.SUMMARY) -> V2RouteResult:
+4 -4
View File
@@ -4,11 +4,11 @@ import asyncio
from dataclasses import dataclass
from app.core.agent.processes.v2 import V2IntentRouter, V2Process
from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path
from app.core.api.domain.models.agent_request import AgentRequest
from app.core.api.domain.models.agent_session import AgentSession
from app.schemas.orchestration import RequestExecutionStatus
+37 -1
View File
@@ -2,8 +2,8 @@ from __future__ import annotations
import asyncio
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.rag.retrieval.session_retriever import RetrievalPlan
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class FakeRetriever:
@@ -50,6 +50,24 @@ class FakeRetriever:
self.calls.append(("substring", list(path_needles)))
return []
async def list_docs_scope_rows(self, _rag_session_id: str, *, limit: int = 8000) -> list[dict]:
del limit
self.calls.append(("scope_rows", None))
return [
{
"path": "docs/api/health-endpoint.md",
"layer": "D1_DOCUMENT_CATALOG",
"title": "HTTP API /health",
"metadata": {"type": "api_method", "endpoint": "GET /health"},
},
{
"path": "docs/api/send-message-endpoint.md",
"layer": "D1_DOCUMENT_CATALOG",
"title": "HTTP API /send",
"metadata": {"type": "api_method", "endpoint": "GET /send"},
},
]
def test_v2_rag_adapter_seeds_exact_rows_from_plan_hints() -> None:
adapter = V2RagRetrievalAdapter(FakeRetriever())
@@ -79,3 +97,21 @@ def test_v2_rag_adapter_uses_substring_fallback_for_missing_hint() -> None:
asyncio.run(adapter.fetch_rows("rag-1", "find file", plan))
assert ("substring", ["missing-health-endpoint.md"]) in retriever.calls
def test_v2_rag_adapter_applies_query_signal_filter_for_api_exposed() -> None:
adapter = V2RagRetrievalAdapter(FakeRetriever())
plan = RetrievalPlan(
profile="api_exposed",
layers=["D1_DOCUMENT_CATALOG"],
limit=50,
filters={
"metadata.type": "api_method",
"query_signals": ["health"],
},
)
rows = asyncio.run(adapter.fetch_rows("rag-1", "health endpoints", plan))
assert len(rows) == 1
assert rows[0]["path"] == "docs/api/health-endpoint.md"
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
def _route(
@@ -0,0 +1,138 @@
"""Tests for pre-LLM scope grounding from D1/D3 catalog rows (no extra RAG layer)."""
from __future__ import annotations
import json
from app.core.agent.processes.v2 import V2IntentRouter
from app.core.agent.utils.process_v2.models import V2ScopeType
class FakeLlm:
def __init__(self, response: str) -> None:
self.response = response
def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str:
del prompt_name, user_input
return self.response
def _llm_ok() -> str:
return json.dumps(
{
"routing_domain": "DOCS",
"intent": "DOC_EXPLAIN",
"subintent": "SUMMARY",
"confidence": 0.9,
"reason_short": "ok",
},
ensure_ascii=False,
)
def _fixture_rows() -> list[dict]:
return [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/billing/overview.md",
"title": "Billing",
"content": "",
"metadata": {"domain": "billing", "summary_text": "Billing domain overview"},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/billing/invoices.md",
"title": "Invoices",
"content": "",
"metadata": {"domain": "billing", "subdomain": "invoice", "tags": ["invoice", "invoices"]},
},
{
"layer": "D3_ENTITY_CATALOG",
"path": "docs/domains/order.md",
"title": "Order",
"content": "",
"metadata": {"entity_name": "Order", "domain": "billing"},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/api/invoices_post.md",
"title": "POST /api/v1/invoices",
"content": "",
"metadata": {
"doc_type": "api_method",
"domain": "billing",
"endpoint": "/api/v1/invoices",
},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/widgets/readme.md",
"title": "Widgets",
"content": "",
"metadata": {"domain": "widgets", "summary_text": "Unrelated domain for negative tests"},
},
]
def _router() -> V2IntentRouter:
return V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: _fixture_rows())
def test_scope_global_project_wide_enumeration() -> None:
r = _router().route("какие api методы есть в проекте", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.GLOBAL
def test_scope_domain_billing() -> None:
r = _router().route("какие api есть в billing", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.DOMAIN
assert r.anchors.process_domain == "billing"
assert any(c.value == "billing" for c in r.anchors.candidate_domains)
def test_scope_subdomain_billing_invoices() -> None:
r = _router().route("какие api есть в billing invoices", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.SUBDOMAIN
assert r.anchors.process_domain == "billing"
assert r.anchors.process_subdomain == "invoice"
def test_scope_entity_order_doc() -> None:
r = _router().route("дай доку по Order", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.ENTITY
assert "order" in [e.lower() for e in r.anchors.entity_names]
def test_scope_entity_endpoint_path() -> None:
r = _router().route("где описан POST /api/v1/invoices", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.ENTITY
assert "/api/v1/invoices" in r.anchors.endpoint_paths
def test_scope_vague_no_false_domain() -> None:
r = _router().route("что там с фывырапфыв", rag_session_id="sess-1")
assert r.scope_type == V2ScopeType.UNKNOWN
assert r.anchors.process_domain is None
def test_scope_russian_payments_phrase_matches_tag() -> None:
rows = [
*_fixture_rows(),
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/billing/payments_ru.md",
"title": "Платежи",
"content": "",
"metadata": {"domain": "billing", "tags": ["платежи"]},
},
]
router = V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: rows)
r = router.route("какие методы есть в платежи", rag_session_id="sess-1")
assert r.scope_type in {V2ScopeType.DOMAIN, V2ScopeType.ENTITY, V2ScopeType.SUBDOMAIN}
assert r.anchors.process_domain == "billing" or any("платеж" in c.value for c in r.anchors.candidate_entities)
def test_router_without_session_skips_db_and_keeps_target_terms() -> None:
r = V2IntentRouter(llm=FakeLlm(_llm_ok())).route("Покажи где описан RuntimeHealth и /health")
assert r.scope_type == V2ScopeType.UNKNOWN
assert "runtimehealth" in r.target_terms
@@ -0,0 +1,83 @@
from __future__ import annotations
import hashlib
from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver
def test_augment_adds_untracked_files_from_filesystem(tmp_path) -> None:
root = tmp_path / "repo"
root.mkdir()
docs = root / "docs" / "api"
docs.mkdir(parents=True)
control_path = docs / "control-actions-endpoint.md"
control_content = "---\ndoc_type: api_method\n---\nGET|POST /actions/{action}\n"
control_path.write_text(control_content, encoding="utf-8")
incoming = [
{
"path": "docs/api/health-endpoint.md",
"content": "health",
"content_hash": hashlib.sha256(b"health").hexdigest(),
}
]
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
paths = {item["path"] for item in out}
assert "docs/api/health-endpoint.md" in paths
assert "docs/api/control-actions-endpoint.md" in paths
def test_augment_prefers_request_payload_for_existing_path(tmp_path) -> None:
root = tmp_path / "repo"
root.mkdir()
docs = root / "docs" / "api"
docs.mkdir(parents=True)
file_path = docs / "health-endpoint.md"
file_path.write_text("from-disk", encoding="utf-8")
incoming_content = "from-request"
incoming = [
{
"path": "docs/api/health-endpoint.md",
"content": incoming_content,
"content_hash": hashlib.sha256(incoming_content.encode("utf-8")).hexdigest(),
}
]
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
by_path = {item["path"]: item for item in out}
assert by_path["docs/api/health-endpoint.md"]["content"] == incoming_content
def test_augment_ignores_files_outside_root_docs(tmp_path) -> None:
root = tmp_path / "repo"
root.mkdir()
(root / "docs").mkdir()
(root / "docs" / "README.md").write_text("docs", encoding="utf-8")
(root / "src").mkdir()
(root / "src" / "app.py").write_text("print('x')", encoding="utf-8")
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=[])
paths = {item["path"] for item in out}
assert "docs/README.md" in paths
assert "src/app.py" not in paths
def test_augment_keeps_docs_when_request_uses_absolute_paths(tmp_path) -> None:
root = tmp_path / "repo"
root.mkdir()
(root / "docs" / "api").mkdir(parents=True)
(root / "docs" / "api" / "health-endpoint.md").write_text("disk", encoding="utf-8")
absolute_docs = str(root / "docs" / "api" / "control-actions-endpoint.md")
absolute_src = str(root / "src" / "app.py")
incoming = [
{"path": absolute_docs, "content": "req-doc", "content_hash": hashlib.sha256(b"req-doc").hexdigest()},
{"path": absolute_src, "content": "req-src", "content_hash": hashlib.sha256(b"req-src").hexdigest()},
]
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
paths = {item["path"] for item in out}
assert "docs/api/control-actions-endpoint.md" in paths
assert "src/app.py" not in paths
@@ -304,3 +304,34 @@ Read health
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
assert "docs integration parse warning" in caplog.text
assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs)
def test_docs_pipeline_tolerates_broken_frontmatter_and_keeps_api_type() -> None:
pipeline = DocsIndexingPipeline()
content = """---
id: api.control_actions_endpoint
type: api_method
doc_type: api_method
title: HTTP API /actions/{action}
endpoint: GET|POST /actions/{action}
links:
called_by:
- ext.operator
tags:
- api
---
# HTTP API /actions/{action}
## Summary
Control actions endpoint.
"""
docs = pipeline.index_file(
repo_id="acme/proj",
commit_sha="abc123",
path="docs/api/control-actions-endpoint.md",
content=content,
)
catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
assert catalog.metadata["type"] == "api_method"
assert catalog.metadata["title"] == "HTTP API /actions/{action}"