Роутер работает нормально в process v2
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import app.core.application as application_module
|
||||
|
||||
|
||||
def test_startup_keeps_backend_alive_when_database_bootstrap_fails(monkeypatch) -> None:
|
||||
def fail_bootstrap(*_args, **_kwargs) -> None:
|
||||
raise RuntimeError("db down")
|
||||
|
||||
monkeypatch.setattr(application_module, "bootstrap_database", fail_bootstrap)
|
||||
|
||||
app = application_module.ModularApplication()
|
||||
|
||||
app.startup()
|
||||
|
||||
health = app.health_payload()
|
||||
assert health["status"] == "degraded"
|
||||
assert health["reason"] == "database_unavailable"
|
||||
assert "db down" in health["details"]
|
||||
@@ -1,7 +1,7 @@
|
||||
import requests
|
||||
|
||||
from app.modules.shared.gigachat.client import GigaChatClient
|
||||
from app.modules.shared.gigachat.settings import GigaChatSettings
|
||||
from app.core.shared.gigachat.client import GigaChatClient
|
||||
from app.core.shared.gigachat.settings import GigaChatSettings
|
||||
|
||||
|
||||
class _FakeTokenProvider:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import logging
|
||||
|
||||
from app.modules.agent.llm.service import AgentLlmService
|
||||
from app.core.agent.llm.service import AgentLlmService
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
@@ -19,7 +19,7 @@ class _FakePrompts:
|
||||
def test_llm_service_logs_input_and_output_for_graph_context(caplog) -> None:
|
||||
service = AgentLlmService(_FakeClient(), _FakePrompts())
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="app.modules.agent.llm.service"):
|
||||
with caplog.at_level(logging.WARNING, logger="app.core.agent.llm.service"):
|
||||
result = service.generate("general_answer", "User input", log_context="graph.default.answer")
|
||||
|
||||
assert result == "LLM output"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import logging
|
||||
|
||||
from app.core.logging_setup import ScrubbingFormatter
|
||||
from app.infra.logging_setup import ScrubbingFormatter
|
||||
|
||||
|
||||
def test_scrubbing_formatter_redacts_identifiers_and_adds_blank_line() -> None:
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.webhook_service import RepoWebhookService
|
||||
|
||||
|
||||
class FakeStoryWriter:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def record_story_commit(self, **kwargs) -> None:
|
||||
self.calls.append(kwargs)
|
||||
|
||||
|
||||
class FakeCacheWriter:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[dict] = []
|
||||
|
||||
def record_repo_cache(self, **kwargs) -> None:
|
||||
self.calls.append(kwargs)
|
||||
|
||||
|
||||
def test_gitea_webhook_binds_story() -> None:
|
||||
writer = FakeStoryWriter()
|
||||
cache = FakeCacheWriter()
|
||||
service = RepoWebhookService(writer, cache)
|
||||
|
||||
result = service.process(
|
||||
provider="gitea",
|
||||
payload={
|
||||
"repository": {"full_name": "acme/proj"},
|
||||
"ref": "refs/heads/feature/AAAA-1234",
|
||||
"pusher": {"username": "alice"},
|
||||
"commits": [
|
||||
{
|
||||
"id": "abc123",
|
||||
"message": "FEAT-1 update docs",
|
||||
"added": ["docs/new.md"],
|
||||
"modified": ["docs/api.md"],
|
||||
"removed": [],
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
assert result["accepted"] is True
|
||||
assert result["story_bound"] is True
|
||||
assert result["story_id"] == "FEAT-1"
|
||||
assert result["cache_recorded"] is True
|
||||
assert len(writer.calls) == 1
|
||||
assert len(cache.calls) == 1
|
||||
assert writer.calls[0]["project_id"] == "acme/proj"
|
||||
|
||||
|
||||
def test_webhook_without_story_id_is_non_fatal() -> None:
|
||||
writer = FakeStoryWriter()
|
||||
cache = FakeCacheWriter()
|
||||
service = RepoWebhookService(writer, cache)
|
||||
|
||||
result = service.process(
|
||||
provider="bitbucket",
|
||||
payload={
|
||||
"repository": {"full_name": "acme/proj"},
|
||||
"push": {
|
||||
"changes": [
|
||||
{
|
||||
"new": {
|
||||
"name": "feature/no-story",
|
||||
"target": {"hash": "abc123", "message": "update docs"},
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert result["accepted"] is True
|
||||
assert result["story_bound"] is False
|
||||
assert result["cache_recorded"] is True
|
||||
assert len(cache.calls) == 1
|
||||
assert writer.calls == []
|
||||
|
||||
|
||||
def test_provider_autodetect_by_headers() -> None:
|
||||
writer = FakeStoryWriter()
|
||||
service = RepoWebhookService(writer)
|
||||
|
||||
result = service.process(
|
||||
headers={"X-Gitea-Event": "push"},
|
||||
payload={
|
||||
"repository": {"full_name": "acme/proj"},
|
||||
"ref": "refs/heads/feature/AAAA-1234",
|
||||
"commits": [{"id": "abc123", "message": "AAAA-1234 update"}],
|
||||
},
|
||||
)
|
||||
|
||||
assert result["accepted"] is True
|
||||
assert result["story_bound"] is True
|
||||
assert result["story_id"] == "AAAA-1234"
|
||||
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.infra.exceptions import AppError
|
||||
from app.core.api.application.session_service import SessionService
|
||||
from app.core.api.infrastructure.ids.session_id_factory import SessionIdFactory
|
||||
from app.core.api.infrastructure.stores.in_memory_session_store import InMemorySessionStore
|
||||
import pytest
|
||||
|
||||
|
||||
def test_create_session_allows_binding_rag_at_creation_time() -> None:
|
||||
service = SessionService(store=InMemorySessionStore(), ids=SessionIdFactory())
|
||||
|
||||
session = service.create("rag_123")
|
||||
|
||||
assert session.session_id.startswith("as_")
|
||||
assert session.active_rag_session_id == "rag_123"
|
||||
assert session.messages == []
|
||||
assert session.conversation_state.turn_index == 0
|
||||
|
||||
|
||||
def test_get_missing_session_raises_not_found() -> None:
|
||||
service = SessionService(store=InMemorySessionStore(), ids=SessionIdFactory())
|
||||
with pytest.raises(AppError) as exc:
|
||||
service.get("missing-session")
|
||||
|
||||
assert exc.value.code == "session_not_found"
|
||||
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
|
||||
|
||||
def _route(*, hints: list[str], terms: list[str], subintent: str = V2Subintent.SUMMARY) -> V2RouteResult:
|
||||
return V2RouteResult(
|
||||
routing_domain=V2Domain.DOCS,
|
||||
intent=V2Intent.DOC_EXPLAIN,
|
||||
subintent=subintent,
|
||||
user_query="q",
|
||||
normalized_query="q",
|
||||
target_terms=terms,
|
||||
anchors=V2RouteAnchors(target_doc_hints=hints, endpoint_paths=["/health"] if "/health" in terms else []),
|
||||
)
|
||||
|
||||
|
||||
def test_target_doc_hint_is_hard_boosted_over_readme() -> None:
|
||||
rows = [
|
||||
{
|
||||
"path": "docs/README.md",
|
||||
"title": "README",
|
||||
"content": "",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {"summary_text": "index", "document_id": "docs.readme"},
|
||||
},
|
||||
{
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"title": "Health endpoint",
|
||||
"content": "",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {"summary_text": "health summary", "document_id": "api.health"},
|
||||
},
|
||||
]
|
||||
route = _route(
|
||||
hints=["docs/api/health-endpoint.md"],
|
||||
terms=["/health", "health"],
|
||||
)
|
||||
|
||||
docs = DocsEvidenceAssembler().assemble_summaries(rows, route)
|
||||
|
||||
assert docs[0].path == "docs/api/health-endpoint.md"
|
||||
assert docs[0].score_breakdown["target_doc_boost"] >= 1000
|
||||
|
||||
|
||||
def test_find_files_prefers_exact_path_match() -> None:
|
||||
rows = [
|
||||
{
|
||||
"path": "docs/architecture/telegram-notify-app-overview.md",
|
||||
"title": "Overview",
|
||||
"content": "overview",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {"document_id": "arch"},
|
||||
},
|
||||
{
|
||||
"path": "docs/domains/runtime-health-entity.md",
|
||||
"title": "Runtime health",
|
||||
"content": "runtime health",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {"document_id": "domain.runtime"},
|
||||
},
|
||||
]
|
||||
route = V2RouteResult(
|
||||
routing_domain=V2Domain.DOCS,
|
||||
intent=V2Intent.DOC_EXPLAIN,
|
||||
subintent=V2Subintent.FIND_FILES,
|
||||
user_query="Где находится runtime health?",
|
||||
normalized_query="Где находится runtime health?",
|
||||
target_terms=["runtime", "health"],
|
||||
anchors=V2RouteAnchors(
|
||||
target_doc_hints=["docs/domains/runtime-health-entity.md"],
|
||||
matched_aliases=["runtime-health-entity"],
|
||||
),
|
||||
)
|
||||
|
||||
files = DocsEvidenceAssembler().assemble_files(rows, route)
|
||||
|
||||
assert files[0].path == "docs/domains/runtime-health-entity.md"
|
||||
assert files[0].match_reason in {"exact_path", "alias_match"}
|
||||
@@ -0,0 +1,98 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter
|
||||
|
||||
|
||||
class FakeLlm:
|
||||
def __init__(self, response: str, *, fail: bool = False) -> None:
|
||||
self.response = response
|
||||
self.fail = fail
|
||||
self.calls: list[tuple[str, str]] = []
|
||||
|
||||
def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str:
|
||||
self.calls.append((prompt_name, user_input))
|
||||
if self.fail:
|
||||
raise RuntimeError("llm unavailable")
|
||||
return self.response
|
||||
|
||||
|
||||
def _llm_response(domain: str, intent: str, subintent: str, *, confidence: float = 0.9, reason: str = "ok") -> str:
|
||||
return json.dumps(
|
||||
{
|
||||
"routing_domain": domain,
|
||||
"intent": intent,
|
||||
"subintent": subintent,
|
||||
"confidence": confidence,
|
||||
"reason_short": reason,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def test_router_uses_llm_as_default_selector() -> None:
|
||||
llm = FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "FIND_FILES", reason="file request"))
|
||||
|
||||
result = V2IntentRouter(llm=llm).route("В каком файле описан RuntimeHealth?")
|
||||
|
||||
assert result.subintent == "FIND_FILES"
|
||||
assert result.routing_mode == "llm_default"
|
||||
assert result.llm_router_used is True
|
||||
assert result.confidence > 0.8
|
||||
assert len(llm.calls) == 1
|
||||
|
||||
|
||||
def test_router_falls_back_when_llm_returns_invalid_enum() -> None:
|
||||
llm = FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "MADE_UP"))
|
||||
|
||||
result = V2IntentRouter(llm=llm).route("В каком документе описан runtime health?")
|
||||
|
||||
assert result.routing_domain == "DOCS"
|
||||
assert result.subintent == "FIND_FILES"
|
||||
assert result.routing_mode == "llm_fallback"
|
||||
assert result.llm_router_used is True
|
||||
|
||||
|
||||
def test_router_falls_back_to_general_summary_when_llm_fails() -> None:
|
||||
result = V2IntentRouter(llm=FakeLlm("{}", fail=True)).route("Расскажи что важно")
|
||||
|
||||
assert result.subintent == "SUMMARY"
|
||||
assert result.routing_mode == "llm_fallback"
|
||||
assert result.llm_router_used is True
|
||||
assert result.intent == "GENERAL_QA"
|
||||
|
||||
|
||||
def test_router_keeps_endpoint_anchor_and_excludes_it_from_file_names() -> None:
|
||||
result = V2IntentRouter(llm=FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "SUMMARY"))).route(
|
||||
"Что делает endpoint /send?"
|
||||
)
|
||||
|
||||
assert result.anchors.endpoint_paths == ["/send"]
|
||||
assert "/send" not in result.anchors.file_names
|
||||
|
||||
|
||||
def test_router_cleans_target_terms_with_lowercase_rules() -> None:
|
||||
result = V2IntentRouter(llm=FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "FIND_FILES"))).route(
|
||||
"Покажи где описан RuntimeHealth и /health"
|
||||
)
|
||||
|
||||
assert "покажи" not in result.target_terms
|
||||
assert "где" not in result.target_terms
|
||||
assert "runtimehealth" in result.target_terms
|
||||
assert "/health" in result.target_terms
|
||||
|
||||
|
||||
def test_router_detects_doc_like_file_names_only() -> None:
|
||||
result = V2IntentRouter(llm=FakeLlm(_llm_response("DOCS", "DOC_EXPLAIN", "FIND_FILES"))).route(
|
||||
"Покажи document docs/architecture/overview.md и runtime_health"
|
||||
)
|
||||
|
||||
assert "docs/architecture/overview.md" in result.anchors.file_names
|
||||
assert "runtime_health" not in result.anchors.file_names
|
||||
|
||||
|
||||
def test_router_reduces_confidence_for_short_vague_query() -> None:
|
||||
result = V2IntentRouter(llm=FakeLlm(_llm_response("GENERAL", "GENERAL_QA", "SUMMARY", confidence=0.8))).route("Что это?")
|
||||
|
||||
assert result.confidence < 0.8
|
||||
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
|
||||
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
|
||||
|
||||
|
||||
def test_target_terms_keeps_only_endpoint_for_short_explain_query() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Кратко объясни как работает /health")
|
||||
|
||||
assert analysis.target_terms == ["/health"]
|
||||
|
||||
|
||||
def test_target_terms_keeps_domain_terms_without_question_words() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Что делает runtime health")
|
||||
|
||||
assert analysis.target_terms == ["runtime", "health"]
|
||||
|
||||
|
||||
def test_target_terms_keeps_only_endpoint_for_file_marker_query() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("В каком файле описан /health?")
|
||||
|
||||
assert analysis.target_terms == ["/health"]
|
||||
|
||||
|
||||
def test_target_terms_drops_noisy_filler_words() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Где там дока про health, покажи плз")
|
||||
|
||||
assert analysis.target_terms == ["health"]
|
||||
|
||||
|
||||
def test_target_terms_keeps_api_and_explicit_endpoint() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Нужен краткий док-саммари по api /send")
|
||||
|
||||
assert analysis.target_terms == ["/send", "api"]
|
||||
|
||||
|
||||
def test_target_terms_keeps_identifier_like_token() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Что делает telegram_notify?")
|
||||
|
||||
assert analysis.target_terms == ["telegram_notify"]
|
||||
|
||||
|
||||
def test_target_terms_rejects_broken_path_like_token() -> None:
|
||||
analysis = V2TargetTermsExtractor().extract("Опиши /actions/{action")
|
||||
|
||||
assert analysis.target_terms == []
|
||||
|
||||
|
||||
def test_file_names_accepts_real_doc_path() -> None:
|
||||
terms = V2TargetTermsExtractor().extract("docs/api/health.md")
|
||||
anchors = V2AnchorExtractor().extract("docs/api/health.md", terms).anchors
|
||||
|
||||
assert anchors.file_names == ["docs/api/health.md"]
|
||||
|
||||
|
||||
def test_file_names_rejects_endpoint_path() -> None:
|
||||
terms = V2TargetTermsExtractor().extract("/health")
|
||||
anchors = V2AnchorExtractor().extract("/health", terms).anchors
|
||||
|
||||
assert anchors.file_names == []
|
||||
|
||||
|
||||
def test_file_names_rejects_identifier_like_token() -> None:
|
||||
terms = V2TargetTermsExtractor().extract("telegram_notify")
|
||||
anchors = V2AnchorExtractor().extract("telegram_notify", terms).anchors
|
||||
|
||||
assert anchors.file_names == []
|
||||
@@ -0,0 +1,286 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter, V2Process
|
||||
from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path
|
||||
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||
from app.core.api.domain.models.agent_request import AgentRequest
|
||||
from app.core.api.domain.models.agent_session import AgentSession
|
||||
from app.schemas.orchestration import RequestExecutionStatus
|
||||
|
||||
|
||||
class FakePublisher:
|
||||
async def publish_status(self, *_args, **_kwargs) -> None:
|
||||
return None
|
||||
|
||||
async def publish_user(self, *_args, **_kwargs) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class FakeTrace:
|
||||
def __init__(self) -> None:
|
||||
self.events: list[tuple[str, str, dict | None]] = []
|
||||
|
||||
def module(self, _name: str) -> "FakeTrace":
|
||||
return self
|
||||
|
||||
def log(self, title, payload=None, **_kwargs) -> None:
|
||||
self.events.append(("module", str(title), payload))
|
||||
|
||||
|
||||
class FakeLlm:
|
||||
def __init__(self, answer: str) -> None:
|
||||
self.answer = answer
|
||||
self.calls: list[tuple[str, str]] = []
|
||||
|
||||
def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str:
|
||||
self.calls.append((prompt_name, user_input))
|
||||
return self.answer
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class FakeRagAdapter:
|
||||
"""Имитирует сырые строки из RagSessionRetriever для summary / find_files."""
|
||||
|
||||
summary_rows: list[dict]
|
||||
file_rows: list[dict]
|
||||
|
||||
async def fetch_rows(self, _rag_session_id: str, _query_text: str, plan) -> list[dict]:
|
||||
if "find_files" in str(plan.profile) or str(plan.profile) == "file_lookup":
|
||||
return list(self.file_rows)
|
||||
return list(self.summary_rows)
|
||||
|
||||
async def fetch_exact_paths(self, _rag_session_id: str, *, paths: list[str], layers: list[str] | None = None) -> list[dict]:
|
||||
pool = [*self.summary_rows, *self.file_rows]
|
||||
want = {normalize_doc_path(p) for p in paths}
|
||||
return [row for row in pool if normalize_doc_path(str(row.get("path") or "")) in want]
|
||||
|
||||
async def fetch_chunks_by_path_substrings(
|
||||
self,
|
||||
_rag_session_id: str,
|
||||
*,
|
||||
path_needles: list[str],
|
||||
layers: list[str] | None = None,
|
||||
limit: int = 200,
|
||||
) -> list[dict]:
|
||||
del layers, limit
|
||||
pool = [*self.summary_rows, *self.file_rows]
|
||||
return [row for row in pool if any(needle in str(row.get("path") or "") for needle in path_needles)]
|
||||
|
||||
|
||||
_SUMMARY_ROWS = [
|
||||
{
|
||||
"path": "docs/api/health.md",
|
||||
"title": "Health endpoint",
|
||||
"content": "",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {
|
||||
"summary_text": "Endpoint /health возвращает агрегированный статус runtime.",
|
||||
"document_id": "api.health",
|
||||
"title": "Health endpoint",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
_FILE_ROWS = [
|
||||
{
|
||||
"path": "docs/domains/runtime-health.md",
|
||||
"title": "Runtime health",
|
||||
"layer": "D3_ENTITY_CATALOG",
|
||||
"content": "x",
|
||||
"metadata": {
|
||||
"entity_name": "RuntimeHealth",
|
||||
"document_id": "domain.runtime_health",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def _v2_process(llm: FakeLlm, adapter: FakeRagAdapter, *, workflow_llm_enabled: bool = True) -> V2Process:
|
||||
return V2Process(
|
||||
llm=llm,
|
||||
policy_resolver=V2RetrievalPolicyResolver(),
|
||||
rag_adapter=adapter,
|
||||
evidence_assembler=DocsEvidenceAssembler(),
|
||||
evidence_gate=DocsEvidenceGate(),
|
||||
router=V2IntentRouter(),
|
||||
workflow_llm_enabled=workflow_llm_enabled,
|
||||
)
|
||||
|
||||
|
||||
def _context(message: str, *, rag_session_id: str | None = "rag-1") -> RuntimeExecutionContext:
|
||||
request = AgentRequest(
|
||||
request_id="req-1",
|
||||
session_id="sess-1",
|
||||
message=message,
|
||||
process_version="v2",
|
||||
status=RequestExecutionStatus.RUNNING,
|
||||
created_at=AgentRequest.create("req-x", "sess-x", "x", "v2").created_at,
|
||||
)
|
||||
session = AgentSession.create("sess-1", rag_session_id)
|
||||
return RuntimeExecutionContext(
|
||||
request=request,
|
||||
session=session,
|
||||
publisher=FakePublisher(),
|
||||
trace=FakeTrace(),
|
||||
)
|
||||
|
||||
|
||||
def test_v2_process_runs_summary_flow() -> None:
|
||||
llm = FakeLlm("Краткое объяснение по документации.")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter)
|
||||
|
||||
result = asyncio.run(process.run(_context("Объясни /health в документации")))
|
||||
|
||||
assert result.answer == "Краткое объяснение по документации."
|
||||
assert llm.calls
|
||||
assert "docs/api/health.md" in llm.calls[0][1]
|
||||
|
||||
|
||||
def test_v2_process_runs_find_files_flow_without_llm() -> None:
|
||||
llm = FakeLlm("should not be used")
|
||||
adapter = FakeRagAdapter(summary_rows=[], file_rows=_FILE_ROWS)
|
||||
process = _v2_process(llm, adapter)
|
||||
|
||||
result = asyncio.run(process.run(_context("В каком файле описан RuntimeHealth?")))
|
||||
|
||||
assert "docs/domains/runtime-health.md" in result.answer
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_v2_process_find_files_uses_deterministic_gate_mode() -> None:
|
||||
llm = FakeLlm("unused")
|
||||
adapter = FakeRagAdapter(summary_rows=[], file_rows=_FILE_ROWS)
|
||||
process = _v2_process(llm, adapter)
|
||||
runtime = _context("В каком документе описан runtime health?")
|
||||
|
||||
asyncio.run(process.run(runtime))
|
||||
|
||||
pipeline_events = [payload for _, title, payload in runtime.trace.events if title == "evidence_gate_checked"]
|
||||
assert pipeline_events
|
||||
assert pipeline_events[0]["answer_mode"] == "deterministic"
|
||||
|
||||
|
||||
def test_v2_process_runs_grounded_general_summary_with_rag() -> None:
|
||||
llm = FakeLlm("Grounded summary.")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter)
|
||||
|
||||
result = asyncio.run(process.run(_context("Что это за сервис?")))
|
||||
|
||||
assert result.answer == "Grounded summary."
|
||||
assert llm.calls
|
||||
assert "Опорные документы" in llm.calls[0][1]
|
||||
|
||||
|
||||
def test_v2_process_returns_insufficiency_for_general_without_rag() -> None:
|
||||
llm = FakeLlm("Общий ответ без обращения к документации.")
|
||||
adapter = FakeRagAdapter(summary_rows=[], file_rows=[])
|
||||
process = _v2_process(llm, adapter)
|
||||
|
||||
result = asyncio.run(process.run(_context("Что это за сервис?", rag_session_id=None)))
|
||||
|
||||
assert "grounded summary" in result.answer
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_v2_process_requires_active_rag_session() -> None:
|
||||
process = _v2_process(FakeLlm("unused"), FakeRagAdapter([], []))
|
||||
|
||||
result = asyncio.run(process.run(_context("Объясни /health в документации", rag_session_id=None)))
|
||||
|
||||
assert "нужна активная RAG-сессия" in result.answer
|
||||
|
||||
|
||||
def test_v2_router_detects_find_files_subintent() -> None:
|
||||
result = V2IntentRouter().route("В каком файле описан RuntimeHealth?")
|
||||
|
||||
assert result.subintent == "FIND_FILES"
|
||||
assert "RuntimeHealth" in result.anchors.entity_names
|
||||
assert "runtimehealth" in result.target_terms
|
||||
|
||||
|
||||
def test_v2_process_logs_retrieved_rag_rows_in_trace() -> None:
|
||||
llm = FakeLlm("Краткое объяснение по документации.")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter)
|
||||
runtime = _context("Объясни /health в документации")
|
||||
|
||||
asyncio.run(process.run(runtime))
|
||||
|
||||
retrieval_events = [payload for _, title, payload in runtime.trace.events if title == "rag_rows_fetched"]
|
||||
assert retrieval_events
|
||||
payload = retrieval_events[0] or {}
|
||||
rows = payload.get("rows") or []
|
||||
assert rows
|
||||
assert rows[0]["path"] == "docs/api/health.md"
|
||||
assert rows[0]["layer"] == "D1_DOCUMENT_CATALOG"
|
||||
assert rows[0]["document_id"] == "api.health"
|
||||
|
||||
|
||||
def test_v2_process_logs_pipeline_steps() -> None:
|
||||
llm = FakeLlm("Краткое объяснение по документации.")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter)
|
||||
runtime = _context("Что делает endpoint /health?")
|
||||
|
||||
asyncio.run(process.run(runtime))
|
||||
|
||||
pipeline_titles = [title for _, title, _ in runtime.trace.events]
|
||||
assert "router_resolved" in pipeline_titles
|
||||
assert "anchors_extracted" in pipeline_titles
|
||||
assert "retrieval_profile_selected" in pipeline_titles
|
||||
assert "retrieval_executed" in pipeline_titles
|
||||
assert "evidence_assembled" in pipeline_titles
|
||||
assert "evidence_gate_checked" in pipeline_titles
|
||||
assert "answer_generated" in pipeline_titles
|
||||
|
||||
|
||||
def test_v2_process_blocks_generic_docs_answer_without_target_doc() -> None:
|
||||
llm = FakeLlm("галлюцинация")
|
||||
adapter = FakeRagAdapter(
|
||||
summary_rows=[
|
||||
{
|
||||
"path": "docs/README.md",
|
||||
"title": "README",
|
||||
"content": "",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"metadata": {"summary_text": "Общий индекс документации.", "document_id": "docs.readme"},
|
||||
}
|
||||
],
|
||||
file_rows=[],
|
||||
)
|
||||
process = _v2_process(llm, adapter)
|
||||
|
||||
result = asyncio.run(process.run(_context("Что делает endpoint /send?")))
|
||||
|
||||
assert "не найден целевой документ" in result.answer
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_v2_process_can_disable_workflow_llm_for_docs_summary() -> None:
|
||||
llm = FakeLlm("should not be used")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter, workflow_llm_enabled=False)
|
||||
|
||||
result = asyncio.run(process.run(_context("Объясни /health в документации")))
|
||||
|
||||
assert "Endpoint /health возвращает агрегированный статус runtime." in result.answer
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_v2_process_can_disable_workflow_llm_for_general_summary() -> None:
|
||||
llm = FakeLlm("should not be used")
|
||||
adapter = FakeRagAdapter(summary_rows=_SUMMARY_ROWS, file_rows=[])
|
||||
process = _v2_process(llm, adapter, workflow_llm_enabled=False)
|
||||
|
||||
result = asyncio.run(process.run(_context("Что это за сервис?")))
|
||||
|
||||
assert "агрегированный статус runtime" in result.answer
|
||||
assert llm.calls == []
|
||||
@@ -0,0 +1,49 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
|
||||
|
||||
def _route(*, hints: list[str], endpoint_paths: list[str] | None = None, subintent: str = "SUMMARY", intent: str = "DOC_EXPLAIN") -> V2RouteResult:
|
||||
return V2RouteResult(
|
||||
routing_domain=V2Domain.DOCS if intent == V2Intent.DOC_EXPLAIN else V2Domain.GENERAL,
|
||||
intent=intent,
|
||||
subintent=subintent,
|
||||
user_query="q",
|
||||
normalized_query="q",
|
||||
anchors=V2RouteAnchors(target_doc_hints=hints, endpoint_paths=endpoint_paths or []),
|
||||
)
|
||||
|
||||
|
||||
def test_policy_prefers_api_docs_for_endpoint_queries() -> None:
|
||||
plan = V2RetrievalPolicyResolver().resolve(
|
||||
_route(hints=["docs/api/health-endpoint.md"], endpoint_paths=["/health"])
|
||||
)
|
||||
|
||||
assert plan.profile == "docs_summary_api_endpoint"
|
||||
assert plan.filters["path_prefixes"] == ["docs/api/", "docs/architecture/", "docs/"]
|
||||
assert plan.filters["prefer_path_prefixes"][0] == "docs/api/"
|
||||
|
||||
|
||||
def test_policy_prefers_logic_docs_for_logic_queries() -> None:
|
||||
plan = V2RetrievalPolicyResolver().resolve(_route(hints=["docs/logic/telegram-notification-loop.md"]))
|
||||
|
||||
assert plan.profile == "docs_summary_logic_flow"
|
||||
assert plan.filters["prefer_path_prefixes"][0] == "docs/logic/"
|
||||
|
||||
|
||||
def test_policy_uses_deterministic_find_files_profile() -> None:
|
||||
plan = V2RetrievalPolicyResolver().resolve(
|
||||
_route(hints=["docs/api/health-endpoint.md"], endpoint_paths=["/health"], subintent=V2Subintent.FIND_FILES)
|
||||
)
|
||||
|
||||
assert plan.profile == "file_lookup"
|
||||
assert plan.layers == ["D1_DOCUMENT_CATALOG", "D3_ENTITY_CATALOG"]
|
||||
assert "health-endpoint.md" in plan.filters["prefer_like_patterns"][0]
|
||||
|
||||
|
||||
def test_policy_uses_grounded_general_profile() -> None:
|
||||
plan = V2RetrievalPolicyResolver().resolve(_route(hints=[], intent=V2Intent.GENERAL_QA))
|
||||
|
||||
assert plan.profile == "general_qa_grounded_summary"
|
||||
assert plan.filters["prefer_path_prefixes"][0] == "docs/architecture/"
|
||||
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.main import create_app
|
||||
|
||||
|
||||
def _route_map(app: FastAPI) -> set[tuple[str, str]]:
|
||||
routes: set[tuple[str, str]] = set()
|
||||
for route in app.routes:
|
||||
methods = getattr(route, "methods", set()) or set()
|
||||
for method in methods:
|
||||
if method in {"HEAD", "OPTIONS"}:
|
||||
continue
|
||||
routes.add((method, route.path))
|
||||
return routes
|
||||
|
||||
|
||||
def test_route_map_exposes_current_api_and_hides_legacy_index_routes() -> None:
|
||||
app = create_app()
|
||||
route_map = _route_map(app)
|
||||
|
||||
assert ("POST", "/api/agent/sessions") in route_map
|
||||
assert ("POST", "/api/agent/requests") in route_map
|
||||
assert ("GET", "/api/agent/requests/{request_id}") in route_map
|
||||
assert ("GET", "/api/agent/streams/{request_id}") in route_map
|
||||
|
||||
assert ("GET", "/api/rag/sessions/{rag_session_id}/jobs/{index_job_id}") in route_map
|
||||
assert ("GET", "/api/rag/sessions/{rag_session_id}/jobs/{index_job_id}/events") in route_map
|
||||
|
||||
assert ("POST", "/api/index/snapshot") not in route_map
|
||||
assert ("POST", "/api/index/changes") not in route_map
|
||||
assert ("GET", "/api/index/jobs/{index_job_id}") not in route_map
|
||||
assert ("GET", "/api/index/jobs/{index_job_id}/events") not in route_map
|
||||
assert ("POST", "/internal/rag/index/snapshot") not in route_map
|
||||
assert ("POST", "/internal/rag/index/changes") not in route_map
|
||||
assert ("GET", "/internal/rag/index/jobs/{index_job_id}") not in route_map
|
||||
assert ("POST", "/internal/rag/retrieve") not in route_map
|
||||
assert ("POST", "/internal/rag-repo/webhook") not in route_map
|
||||
assert ("POST", "/api/agent/sessions/{session_id}/rag") not in route_map
|
||||
assert ("POST", "/api/agent/sessions/{session_id}/reset") not in route_map
|
||||
assert ("POST", "/api/rag/sessions") not in route_map
|
||||
assert ("POST", "/api/rag/sessions/{rag_session_id}/changes") not in route_map
|
||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.agent.intent_router_v2.models import IntentRouterResult
|
||||
from app.core.agent.intent_router.models import IntentRouterResult
|
||||
|
||||
|
||||
def assert_intent(out: IntentRouterResult, expected: str) -> None:
|
||||
@@ -57,7 +57,7 @@ def assert_domain_layer_prefixes(out: IntentRouterResult) -> None:
|
||||
prefixes = {layer.layer_id[0] for layer in out.retrieval_spec.layer_queries if layer.layer_id}
|
||||
if out.retrieval_spec.domains == ["CODE"]:
|
||||
assert prefixes <= {"C"}
|
||||
elif out.retrieval_spec.domains == ["DOCS"]:
|
||||
elif out.retrieval_spec.domains in (["DOCS"], ["GENERAL"]):
|
||||
assert prefixes <= {"D"}
|
||||
else:
|
||||
assert prefixes <= {"C", "D"}
|
||||
|
||||
@@ -2,26 +2,22 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2, RepoContext
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.agent.intent_router import ConversationState, IntentRouterV2, RepoContext
|
||||
|
||||
|
||||
def repo_context() -> RepoContext:
|
||||
return RepoContext(
|
||||
languages=["python"],
|
||||
available_domains=["CODE", "DOCS"],
|
||||
available_domains=["DOCS", "GENERAL"],
|
||||
available_layers=[
|
||||
RagLayer.CODE_ENTRYPOINTS,
|
||||
RagLayer.CODE_SYMBOL_CATALOG,
|
||||
RagLayer.CODE_DEPENDENCY_GRAPH,
|
||||
RagLayer.CODE_SEMANTIC_ROLES,
|
||||
RagLayer.CODE_SOURCE_CHUNKS,
|
||||
RagLayer.DOCS_DOC_CHUNKS,
|
||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||
RagLayer.DOCS_FACT_INDEX,
|
||||
RagLayer.DOCS_ENTITY_CATALOG,
|
||||
RagLayer.DOCS_WORKFLOW_INDEX,
|
||||
RagLayer.DOCS_RELATION_GRAPH,
|
||||
RagLayer.DOCS_INTEGRATION_INDEX,
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.code.pipeline import CodeIndexingPipeline
|
||||
|
||||
|
||||
def test_code_pipeline_builds_source_symbols_edges_and_entrypoints() -> None:
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.docs.pipeline import DocsIndexingPipeline
|
||||
|
||||
|
||||
def test_docs_pipeline_builds_new_d0_to_d5_layers() -> None:
|
||||
def test_docs_pipeline_builds_docs_layers_from_modern_markdown_structure() -> None:
|
||||
pipeline = DocsIndexingPipeline()
|
||||
content = """---
|
||||
id: api.billing.create_invoice
|
||||
type: api_method
|
||||
doc_type: api_method
|
||||
name: create_invoice
|
||||
title: Create Invoice API
|
||||
module: billing
|
||||
domain: billing
|
||||
sub_domain: invoices
|
||||
layer: application
|
||||
status: draft
|
||||
updated_at: 2026-03-23
|
||||
@@ -17,21 +20,26 @@ tags: [billing, api]
|
||||
entities: [Invoice]
|
||||
parent: billing_api
|
||||
children: []
|
||||
related_docs: [api.billing.validate_invoice]
|
||||
links:
|
||||
- type: related_api
|
||||
target: api.billing.validate_invoice
|
||||
called_by:
|
||||
- ui.billing.invoice_form
|
||||
uses_logic:
|
||||
- logic.billing.invoice_validation
|
||||
---
|
||||
# Summary
|
||||
# Create Invoice API
|
||||
|
||||
## Summary
|
||||
|
||||
Creates an invoice in billing.
|
||||
|
||||
# Details
|
||||
## Details
|
||||
|
||||
## Описание
|
||||
### Описание
|
||||
|
||||
Создает счет на оплату.
|
||||
|
||||
## Сценарий
|
||||
### Сценарий
|
||||
|
||||
**Название:**
|
||||
Create invoice
|
||||
@@ -55,7 +63,12 @@ Create invoice
|
||||
**Постусловие:**
|
||||
- Invoice is created.
|
||||
|
||||
## Контракт
|
||||
### Контракт
|
||||
|
||||
#### Метаданные вызова
|
||||
- Method: POST
|
||||
- Auth: USER
|
||||
- Idempotency: false
|
||||
|
||||
### Входные параметры
|
||||
|
||||
@@ -69,7 +82,22 @@ Create invoice
|
||||
| --- | --- | --- |
|
||||
| invoice_id | string | yes |
|
||||
|
||||
## Ошибки
|
||||
### Интеграции
|
||||
|
||||
#### Billing DB
|
||||
- target: db.billing.invoices
|
||||
- target_type: db
|
||||
- direction: outbound
|
||||
- interaction: writes
|
||||
- via: invoice repository
|
||||
- purpose: persist created invoices
|
||||
- details:
|
||||
- transaction: required
|
||||
- tables:
|
||||
- invoices
|
||||
- invoice_items
|
||||
|
||||
### Ошибки
|
||||
|
||||
| status | error | client action |
|
||||
| --- | --- | --- |
|
||||
@@ -89,10 +117,14 @@ Create invoice
|
||||
assert RagLayer.DOCS_ENTITY_CATALOG in layers
|
||||
assert RagLayer.DOCS_WORKFLOW_INDEX in layers
|
||||
assert RagLayer.DOCS_RELATION_GRAPH in layers
|
||||
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
|
||||
|
||||
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
|
||||
assert catalog_doc.metadata["module"] == "billing"
|
||||
assert catalog_doc.metadata["domain"] == "billing"
|
||||
assert catalog_doc.metadata["subdomain"] == "invoices"
|
||||
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
|
||||
|
||||
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
|
||||
assert any("has_field amount" in text for text in fact_texts)
|
||||
@@ -108,6 +140,16 @@ Create invoice
|
||||
relation_targets = [doc.metadata["target_id"] for doc in docs if doc.layer == RagLayer.DOCS_RELATION_GRAPH]
|
||||
assert "billing_api" in relation_targets
|
||||
assert "api.billing.validate_invoice" in relation_targets
|
||||
assert "logic.billing.invoice_validation" in relation_targets
|
||||
assert "Invoice" in relation_targets
|
||||
|
||||
chunk_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
|
||||
assert chunk_doc.metadata["section_path"]
|
||||
assert chunk_doc.metadata["artifact_type"] == "DOCS"
|
||||
assert chunk_doc.metadata["domain"] == "billing"
|
||||
assert chunk_doc.metadata["subdomain"] == "invoices"
|
||||
|
||||
integration_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_INTEGRATION_INDEX)
|
||||
assert integration_doc.metadata["target"] == "db.billing.invoices"
|
||||
assert integration_doc.metadata["target_type"] == "db"
|
||||
assert integration_doc.metadata["details"]["transaction"] == "required"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.intent_router_v2 import IntentRouterV2
|
||||
from app.modules.agent.llm.prompt_loader import PromptLoader
|
||||
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
|
||||
from app.modules.agent.runtime.steps.generation import RuntimePromptSelector
|
||||
from app.core.agent.intent_router import IntentRouterV2
|
||||
from app.core.agent.llm.prompt_loader import PromptLoader
|
||||
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.core.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
|
||||
from app.core.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
|
||||
from app.core.agent.orchestration.processes.v2.prompt_payload_builder import V2PromptPayloadBuilder
|
||||
from app.core.agent.runtime.steps.generation import RuntimePromptSelector
|
||||
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
@@ -43,7 +44,7 @@ def test_prompt_selector_uses_docs_prompts_only() -> None:
|
||||
|
||||
def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
builder = DocsPromptPayloadBuilder()
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
|
||||
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
|
||||
|
||||
payload = builder.build(
|
||||
question="Объясни billing",
|
||||
@@ -52,6 +53,8 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
evidence_bundle=DocsEvidenceBundle(
|
||||
intent="DOCUMENTATION_EXPLAIN",
|
||||
sub_intent="COMPONENT_EXPLAIN",
|
||||
primary_documents=[{"title": "Billing"}],
|
||||
secondary_documents=[{"title": "Billing relation"}],
|
||||
documents=[{"title": "Billing"}],
|
||||
facts=[{"content": "Handles payments"}],
|
||||
relations=[{"title": "Billing -> Orders"}],
|
||||
@@ -62,12 +65,36 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
assert '"question": "Объясни billing"' in payload
|
||||
assert '"intent": "DOCUMENTATION_EXPLAIN"' in payload
|
||||
assert '"sub_intent": "COMPONENT_EXPLAIN"' in payload
|
||||
assert '"primary_documents"' in payload
|
||||
assert '"secondary_documents"' in payload
|
||||
assert '"documents"' in payload
|
||||
assert '"facts"' in payload
|
||||
assert '"relations"' in payload
|
||||
assert '"api_contract"' in payload
|
||||
|
||||
|
||||
def test_v2_prompt_payload_accepts_api_method_mode_fields() -> None:
|
||||
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle
|
||||
|
||||
payload = V2PromptPayloadBuilder().build(
|
||||
question="Как работает метод health?",
|
||||
intent="DOCUMENTATION_EXPLAIN",
|
||||
sub_intent="API_METHOD_EXPLAIN",
|
||||
evidence_bundle=DocsEvidenceBundle(intent="DOCUMENTATION_EXPLAIN", sub_intent="API_METHOD_EXPLAIN"),
|
||||
api_method_answer_mode="indirect",
|
||||
target_endpoint_identity={
|
||||
"anchor": "health",
|
||||
"normalized_path": "/health",
|
||||
"normalized_doc_id": "api.health_endpoint",
|
||||
},
|
||||
direct_api_spec_found=False,
|
||||
)
|
||||
|
||||
assert '"api_method_answer_mode": "indirect"' in payload
|
||||
assert '"normalized_doc_id": "api.health_endpoint"' in payload
|
||||
assert '"direct_api_spec_found": false' in payload
|
||||
|
||||
|
||||
def test_openapi_postprocessor_requires_paths_for_full_spec() -> None:
|
||||
validator = OpenAPIPostprocessor()
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.intent_router_v2 import IntentRouterV2
|
||||
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.core.agent.intent_router import IntentRouterV2
|
||||
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner, DocsTaskPlanner
|
||||
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
@@ -135,6 +135,52 @@ def test_openapi_partial_contract_returns_partial_mode() -> None:
|
||||
assert "/orders" in result.answer
|
||||
|
||||
|
||||
def test_docs_pipeline_accepts_precomputed_task_plan_without_rerouting() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D2_FACT_INDEX",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:response",
|
||||
"content": "Returns health summary and component diagnostics.",
|
||||
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
|
||||
},
|
||||
]
|
||||
route_result = IntentRouterV2().route(
|
||||
"Объясни API метод /health",
|
||||
repo_context=repo_context(),
|
||||
)
|
||||
task_plan = DocsTaskPlanner().plan(
|
||||
"Объясни API метод /health",
|
||||
"docs-session",
|
||||
route_result=route_result,
|
||||
)
|
||||
|
||||
class FailingRouter:
|
||||
def route(self, *_args, **_kwargs):
|
||||
raise AssertionError("runner should use the precomputed task plan")
|
||||
|
||||
runner = DocsQAPipelineRunner(FailingRouter(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run(
|
||||
"Объясни API метод /health",
|
||||
"docs-session",
|
||||
mode="pre_llm_only",
|
||||
task_plan=task_plan,
|
||||
)
|
||||
|
||||
assert result.router_result.intent == "DOCUMENTATION_EXPLAIN"
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_pre_llm_mode_returns_diagnostic_only_without_answer_generation() -> None:
|
||||
rows = [
|
||||
{
|
||||
@@ -172,7 +218,7 @@ def test_pre_llm_mode_detects_path_anchor_candidates() -> None:
|
||||
|
||||
assert "/health" in result.diagnostics.query_anchor_candidates
|
||||
assert "/health" in result.diagnostics.resolved_anchor_candidates
|
||||
assert result.diagnostics.planned_layers == ["D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
assert result.diagnostics.planned_layers == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS", "D4_WORKFLOW_INDEX"]
|
||||
assert set(result.diagnostics.executed_layers) == {"D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D0_DOC_CHUNKS"}
|
||||
|
||||
|
||||
@@ -318,8 +364,311 @@ def test_openapi_request_fragment_uses_fragment_aware_gate() -> None:
|
||||
assert result.answer_mode in {"ready", "ready_partial"}
|
||||
assert result.answer
|
||||
assert "type: object" in result.answer
|
||||
assert "message:" in result.answer
|
||||
assert "chat_id:" in result.answer
|
||||
|
||||
|
||||
def test_api_method_explain_prefers_api_method_primary_doc() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health describes overall service health.",
|
||||
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D5_RELATION_GRAPH",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health links",
|
||||
"content": "runtime health used by health endpoint",
|
||||
"metadata": {"document_id": "domain.runtime_health", "target_doc_id": "api.health_endpoint"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_anchor in {"health", "/health"}
|
||||
assert result.diagnostics.api_method_match_found is True
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert "api.health_endpoint" in result.diagnostics.primary_doc_candidates
|
||||
assert result.diagnostics.evidence_gate_require_target_api_spec is True
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is True
|
||||
assert result.answer_mode == "exact"
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_api_method_explain_promotes_api_doc_via_links() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health is the domain model for observability.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:Overview",
|
||||
"content": "Endpoint /health returns overall runtime status and component diagnostics.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает health endpoint?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.diagnostics.promoted_via_links == ["api.health_endpoint"]
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.api_method_match_found is True
|
||||
|
||||
|
||||
def test_api_method_explain_rejects_cross_endpoint_primary_candidates() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/control-actions-endpoint.md",
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"content": "Endpoint for controlling actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/health-endpoint.md",
|
||||
"title": "HTTP API /health",
|
||||
"content": "Health endpoint returns runtime health and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "api.health_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/health",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/documentation/api/actions-endpoint.md",
|
||||
"title": "api.control_actions_endpoint:Scenario",
|
||||
"content": "The /actions/{action} endpoint triggers runtime actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.primary_api_documents_after_filter == ["api.health_endpoint"]
|
||||
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert result.diagnostics.cross_endpoint_leakage_detected is True
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is True
|
||||
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
|
||||
|
||||
def test_api_method_explain_without_exact_target_returns_insufficiency() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/control-actions-endpoint.md",
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"content": "Endpoint for controlling actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/send-endpoint.md",
|
||||
"title": "HTTP API /send",
|
||||
"content": "Endpoint for sending messages.",
|
||||
"metadata": {
|
||||
"document_id": "api.send_message_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/send",
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
|
||||
assert result.diagnostics.selected_primary_documents == []
|
||||
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert "api.send_message_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert result.diagnostics.target_api_spec_found_exact is False
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is False
|
||||
assert result.diagnostics.gate_decision == "reject"
|
||||
assert result.answer_mode == "insufficient"
|
||||
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
assert "api.send_message_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
|
||||
|
||||
def test_api_method_explain_uses_indirect_mode_from_target_linked_docs() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health",
|
||||
"content": "Runtime health describes overall service state and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.answer_mode == "indirect"
|
||||
assert result.diagnostics.gate_decision == "allow_indirect"
|
||||
assert result.diagnostics.raw_retrieval_non_empty is True
|
||||
assert result.diagnostics.target_primary_context_non_empty is False
|
||||
assert result.diagnostics.indirect_target_context_non_empty is True
|
||||
assert result.diagnostics.graph_promotion_attempted is True
|
||||
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
|
||||
assert result.diagnostics.promoted_target_loaded is False
|
||||
assert result.diagnostics.materialization_failure_reason == "materialized_rows_empty"
|
||||
assert result.diagnostics.final_primary_document_ids == []
|
||||
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
|
||||
|
||||
|
||||
def test_api_method_explain_skips_llm_when_no_exact_or_indirect_context() -> None:
|
||||
from tests.unit_tests.rag.test_docs_prompt_layer import FakeLlm
|
||||
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/send-endpoint.md",
|
||||
"title": "HTTP API /send",
|
||||
"content": "Endpoint for sending messages.",
|
||||
"metadata": {
|
||||
"document_id": "api.send_message_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/send",
|
||||
},
|
||||
}
|
||||
]
|
||||
llm = FakeLlm("should not be called")
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context(), llm=llm)
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session")
|
||||
|
||||
assert llm.calls == []
|
||||
assert result.answer_mode == "insufficient"
|
||||
assert result.diagnostics.llm_called is False
|
||||
assert result.diagnostics.llm_call_reason == "no_exact_or_indirect_target_context"
|
||||
assert result.diagnostics.gate_decision == "reject"
|
||||
|
||||
|
||||
def test_api_method_explain_materializes_promoted_target_into_primary_context() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health",
|
||||
"content": "Runtime health describes service state and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
}
|
||||
]
|
||||
materialized_rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D2_FACT_INDEX",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:response",
|
||||
"content": "Returns health summary and component diagnostics.",
|
||||
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:Overview",
|
||||
"content": "Endpoint /health returns overall runtime health.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(
|
||||
IntentRouterV2(),
|
||||
InMemoryDocsRetrievalAdapter(rows, materialized_rows=materialized_rows),
|
||||
repo_context=repo_context(),
|
||||
)
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.answer_mode == "exact"
|
||||
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
|
||||
assert result.diagnostics.graph_promotion_materialized == ["api.health_endpoint"]
|
||||
assert result.diagnostics.promoted_target_loaded is True
|
||||
assert result.diagnostics.promoted_target_chunks_loaded == 1
|
||||
assert result.diagnostics.promoted_target_facts_loaded == 1
|
||||
assert result.diagnostics.pinned_document_ids == ["api.health_endpoint"]
|
||||
assert result.diagnostics.final_primary_document_ids == ["api.health_endpoint"]
|
||||
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
|
||||
assert result.diagnostics.materialized_target_primary_context_non_empty is True
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_entity_question_does_not_prefer_api_method_primary_doc() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health describes service state.",
|
||||
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime status.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что такое runtime health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "ENTITY_EXPLAIN"
|
||||
assert result.diagnostics.selected_primary_documents == []
|
||||
assert result.diagnostics.api_method_match_found is False
|
||||
assert result.answer == ""
|
||||
|
||||
|
||||
def test_openapi_method_with_only_path_is_rejected() -> None:
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
import pytest
|
||||
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("query", "plan_id", "primary_doc_types", "secondary_doc_types", "expected_filter_key", "expected_filter_value"),
|
||||
[
|
||||
("объясни /health", "docs_api_method_explain_v1", ["api_method"], ["logic_block", "domain_entity", "architecture_overview"], "endpoint_path", "/health"),
|
||||
("какие методы в notifications", "docs_list_api_methods_v1", ["api_method"], [], "domain_name", "notifications"),
|
||||
("найди документацию по telegram_delivery", "docs_find_documents_by_domain_v1", ["index_page", "architecture_overview", "api_method", "logic_block", "domain_entity"], [], "domain_name", "telegram_delivery"),
|
||||
("сгенерируй openapi по /send", "docs_generate_openapi_v1", ["api_method"], ["domain_entity", "logic_block"], "endpoint_path", "/send"),
|
||||
("как устроен сервис", "docs_general_docs_qa_v1", ["index_page", "architecture_overview"], ["logic_block", "domain_entity", "api_method"], "scope_level", "project"),
|
||||
],
|
||||
)
|
||||
def test_docs_retrieval_plan_contracts(
|
||||
query: str,
|
||||
plan_id: str,
|
||||
primary_doc_types: list[str],
|
||||
secondary_doc_types: list[str],
|
||||
expected_filter_key: str,
|
||||
expected_filter_value: str,
|
||||
) -> None:
|
||||
result = run_sequence([query])[0]
|
||||
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == plan_id
|
||||
assert result.retrieval_plan.primary_doc_types == primary_doc_types
|
||||
assert result.retrieval_plan.secondary_doc_types == secondary_doc_types
|
||||
assert result.retrieval_plan.filters[expected_filter_key] == expected_filter_value
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.core.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
|
||||
|
||||
|
||||
def test_explain_intent_builder_extracts_route_symbol_and_file_hints() -> None:
|
||||
|
||||
@@ -2,13 +2,12 @@ import os
|
||||
|
||||
import pytest
|
||||
|
||||
from app.modules.agent.intent_router_v2.factory import GigaChatIntentRouterFactory
|
||||
from app.modules.shared.env_loader import load_workspace_env
|
||||
from app.core.agent.intent_router.factory import GigaChatIntentRouterFactory
|
||||
from app.core.shared.config import load_workspace_env
|
||||
from tests.unit_tests.rag.asserts_intent_router import (
|
||||
assert_domains,
|
||||
assert_file_only_scope,
|
||||
assert_intent,
|
||||
assert_test_policy,
|
||||
assert_path_scope,
|
||||
)
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
@@ -29,7 +28,7 @@ def test_e2e_path_carryover_flow() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
|
||||
assert "app/core/config.py" in third.retrieval_spec.filters.path_scope
|
||||
second_file_anchors = [anchor.value for anchor in second.query_plan.anchors if anchor.type == "FILE_PATH" and anchor.source == "conversation_state"]
|
||||
@@ -39,7 +38,10 @@ def test_e2e_path_carryover_flow() -> None:
|
||||
assert any(anchor.type == "FILE_PATH" and anchor.source == "conversation_state" and anchor.span is None for anchor in third.query_plan.anchors)
|
||||
carried_symbols = [anchor.value for anchor in third.query_plan.anchors if anchor.type == "SYMBOL" and anchor.source == "conversation_state"]
|
||||
assert carried_symbols in ([], ["load_config"])
|
||||
assert third.query_plan.sub_intent == "EXPLAIN_LOCAL"
|
||||
assert_intent(first, "GENERAL_QA")
|
||||
assert_intent(second, "GENERAL_QA")
|
||||
assert_intent(third, "GENERAL_QA")
|
||||
assert third.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in third.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
|
||||
@@ -52,9 +54,9 @@ def test_e2e_docs_switch_from_code_topic() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_intent(first, "CODE_QA")
|
||||
assert_intent(first, "DOCUMENTATION_EXPLAIN")
|
||||
assert_intent(second, "DOCUMENTATION_EXPLAIN")
|
||||
assert second.conversation_mode == "SWITCH"
|
||||
assert second.conversation_mode == "CONTINUE"
|
||||
assert_domains(second, ["DOCS"])
|
||||
carried = [
|
||||
anchor
|
||||
@@ -75,12 +77,10 @@ def test_e2e_tests_toggle_flow() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_intent(first, "CODE_QA")
|
||||
assert_intent(second, "CODE_QA")
|
||||
assert_test_policy(first, "INCLUDE")
|
||||
assert_test_policy(second, "EXCLUDE")
|
||||
assert first.query_plan.sub_intent == "FIND_TESTS"
|
||||
assert second.query_plan.sub_intent == "EXPLAIN"
|
||||
assert_intent(first, "GENERAL_QA")
|
||||
assert_intent(second, "GENERAL_QA")
|
||||
assert first.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert "tests" in second.query_plan.negations
|
||||
assert not second.query_plan.expansions
|
||||
assert second.evidence_policy.require_flow is False
|
||||
@@ -94,9 +94,9 @@ def test_e2e_open_file_then_generic_next_steps_is_lightweight() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_file_only_scope(second, "app/core/config.py")
|
||||
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert_path_scope(second, "app/core/config.py", "app/core")
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert second.evidence_policy.require_flow is False
|
||||
@@ -118,9 +118,9 @@ def test_intent_router_live_smoke_path_carryover() -> None:
|
||||
trace_label="intent-router-live",
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
|
||||
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert second.evidence_policy.require_flow is False
|
||||
|
||||
@@ -1,204 +1,121 @@
|
||||
import pytest
|
||||
|
||||
from tests.unit_tests.rag.asserts_intent_router import (
|
||||
assert_domain_layer_prefixes,
|
||||
assert_domains,
|
||||
assert_file_only_scope,
|
||||
assert_has_file_path,
|
||||
assert_intent,
|
||||
assert_no_symbol_keyword,
|
||||
assert_no_symbol_leakage_from_paths,
|
||||
assert_spans_valid,
|
||||
assert_sub_intent,
|
||||
assert_test_policy,
|
||||
)
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
def test_invariant_code_file_path_with_canonical_key_term() -> None:
|
||||
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_has_file_path(result, "app/core/config.py")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "файл" in key_terms
|
||||
assert "файлу" not in key_terms
|
||||
assert_spans_valid(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
def _docs_result(query: str):
|
||||
result = run_sequence([query])[0]
|
||||
assert result.docs_routing is not None
|
||||
assert result.retrieval_plan is not None
|
||||
return result
|
||||
|
||||
|
||||
def test_invariant_open_file_for_specified_file_phrase_uses_narrow_layers() -> None:
|
||||
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "endpoint"),
|
||||
[
|
||||
("как работает метод health", "/health"),
|
||||
("объясни /health", "/health"),
|
||||
("что делает endpoint /send", "/send"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_api_method_explain_cases(query: str, endpoint: str) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_sub_intent(result, "OPEN_FILE")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["C0_SOURCE_CHUNKS"]
|
||||
assert result.evidence_policy.require_flow is False
|
||||
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.docs_routing.intent == "DOCS_QA"
|
||||
assert result.docs_routing.scope.level == "method"
|
||||
assert result.docs_routing.anchors.endpoint_path == endpoint
|
||||
assert result.retrieval_plan.plan_id == "docs_api_method_explain_v1"
|
||||
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
|
||||
|
||||
|
||||
def test_invariant_inline_code_span_routes_to_code_and_extracts_symbol() -> None:
|
||||
result = run_sequence(["Уточни по коду `def build(x): return x`"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "scope_level", "domain_name"),
|
||||
[
|
||||
("какие есть методы в проекте", "project", None),
|
||||
("покажи все api", "project", None),
|
||||
("какие методы в notifications", "domain", "notifications"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_list_api_methods_cases(query: str, scope_level: str, domain_name: str | None) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_spans_valid(result)
|
||||
assert_no_symbol_keyword(result)
|
||||
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "build" in symbols
|
||||
assert "def" in key_terms
|
||||
assert result.docs_routing.sub_intent == "LIST_API_METHODS"
|
||||
assert result.docs_routing.intent == "DOCS_DISCOVERY"
|
||||
assert result.docs_routing.scope.level == scope_level
|
||||
assert result.retrieval_plan.plan_id == "docs_list_api_methods_v1"
|
||||
assert result.retrieval_plan.primary_doc_types == ["api_method"]
|
||||
if domain_name:
|
||||
assert result.retrieval_plan.filters["domain_name"] == domain_name
|
||||
|
||||
|
||||
def test_invariant_docs_cyrillic_path_with_quotes() -> None:
|
||||
result = run_sequence(["Что сказано в «docs/архитектура.md»?"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "domain_name", "subdomain_name", "entity_name"),
|
||||
[
|
||||
("какие документы есть по notifications", "notifications", None, None),
|
||||
("найди документацию по telegram_delivery", "telegram_delivery", None, None),
|
||||
("какие документы связаны с health", None, None, "health"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_find_documents_cases(
|
||||
query: str,
|
||||
domain_name: str | None,
|
||||
subdomain_name: str | None,
|
||||
entity_name: str | None,
|
||||
) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "COMPONENT_EXPLAIN")
|
||||
assert_domains(result, ["DOCS"])
|
||||
assert "docs/архитектура.md" in result.query_plan.normalized
|
||||
assert_has_file_path(result, "docs/архитектура.md")
|
||||
assert any(anchor.type == "DOC_REF" for anchor in result.query_plan.anchors)
|
||||
assert result.retrieval_spec.filters.doc_kinds == []
|
||||
assert_spans_valid(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
|
||||
assert result.docs_routing.intent == "DOCS_DISCOVERY"
|
||||
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
|
||||
if domain_name:
|
||||
assert result.retrieval_plan.filters["domain_name"] == domain_name
|
||||
if subdomain_name:
|
||||
assert result.retrieval_plan.filters["subdomain_name"] == subdomain_name
|
||||
if entity_name:
|
||||
assert result.retrieval_plan.filters["entity_name"] == entity_name
|
||||
|
||||
|
||||
def test_invariant_file_check_phrase_not_project_misc() -> None:
|
||||
result = run_sequence(["Проверь app/modules/rag/explain/intent_builder.py и объясни"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "scope_level", "endpoint"),
|
||||
[
|
||||
("сгенерируй openapi по /health", "method", "/health"),
|
||||
("собери swagger по notifications", "domain", None),
|
||||
("сделай спецификацию api по всему проекту", "project", None),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_generate_openapi_cases(query: str, scope_level: str, endpoint: str | None) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_domains(result, ["CODE"])
|
||||
assert_no_symbol_leakage_from_paths(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.docs_routing.sub_intent == "GENERATE_OPENAPI"
|
||||
assert result.docs_routing.intent == "DOCS_GENERATION"
|
||||
assert result.docs_routing.scope.level == scope_level
|
||||
assert result.retrieval_plan.plan_id == "docs_generate_openapi_v1"
|
||||
if endpoint:
|
||||
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
|
||||
|
||||
|
||||
def test_invariant_tests_include_routing() -> None:
|
||||
result = run_sequence(["Где тесты на ConfigManager?"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
"query",
|
||||
[
|
||||
"что делает это приложение",
|
||||
"как устроен сервис",
|
||||
"как связаны worker и api",
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_general_docs_qa_cases(query: str) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_test_policy(result, "INCLUDE")
|
||||
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "ConfigManager" in symbols
|
||||
assert "тест" in key_terms
|
||||
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
|
||||
assert result.docs_routing.intent == "DOCS_FALLBACK"
|
||||
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
|
||||
|
||||
|
||||
def test_invariant_keyword_hints_and_expansions_for_function_identifier() -> None:
|
||||
result = run_sequence(["Теперь объясни функцию load_config"])[0]
|
||||
def test_docs_mvp_retrieval_filters_are_merged_into_legacy_spec() -> None:
|
||||
result = _docs_result("какие методы в notifications")
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert "load_config" in result.query_plan.keyword_hints
|
||||
assert "функция" not in result.query_plan.keyword_hints
|
||||
assert "def" not in result.query_plan.expansions
|
||||
|
||||
|
||||
def test_invariant_open_file_sub_intent_uses_narrow_retrieval_profile() -> None:
|
||||
result = run_sequence(["Открой файл app/core/config.py"])[0]
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_sub_intent(result, "OPEN_FILE")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert "C0_SOURCE_CHUNKS" in layer_ids
|
||||
assert "C1_SYMBOL_CATALOG" not in layer_ids
|
||||
assert "C2_DEPENDENCY_GRAPH" not in layer_ids
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert result.evidence_policy.require_flow is False
|
||||
|
||||
|
||||
def test_invariant_docs_question_routes_to_docs() -> None:
|
||||
result = run_sequence(["Что сказано в документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_domains(result, ["DOCS"])
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.query_plan.keyword_hints
|
||||
assert any(item in result.query_plan.expansions for item in result.query_plan.keyword_hints)
|
||||
|
||||
|
||||
def test_invariant_docs_flow_sub_intent_uses_workflow_layers() -> None:
|
||||
result = run_sequence(["Как работает процесс создания заказа по документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D4_WORKFLOW_INDEX", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_docs_entity_sub_intent_uses_entity_layers() -> None:
|
||||
result = run_sequence(["Что такое сущность Order в документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "ENTITY_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D3_ENTITY_CATALOG", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_entity_like_camel_case_prefers_entity_explain() -> None:
|
||||
result = run_sequence(["Что такое WorkerHealth?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "ENTITY_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_related_docs_routes_to_docs_explain() -> None:
|
||||
result = run_sequence(["Найди документацию по billing"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_docs_navigation_uses_related_docs_explain() -> None:
|
||||
result = run_sequence(["Что связано с checkout документацией?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_openapi_routes_to_docs_layers_with_api_filter() -> None:
|
||||
result = run_sequence(["Сгенерируй openapi yaml для создания заказа"])[0]
|
||||
|
||||
assert_intent(result, "OPENAPI_GENERATION")
|
||||
assert_sub_intent(result, "OPENAPI_METHOD_GENERATE")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS"]
|
||||
assert result.retrieval_spec.filters.doc_type == "api_method"
|
||||
|
||||
|
||||
def test_invariant_general_qa_routes_to_generic_docs_layers() -> None:
|
||||
result = run_sequence(["Помоги разобраться"])[0]
|
||||
|
||||
assert_intent(result, "GENERAL_QA")
|
||||
assert_sub_intent(result, "GENERIC_QA")
|
||||
assert_domains(result, ["DOCS"])
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_component_like_manager_routes_to_component_explain() -> None:
|
||||
result = run_sequence(["Какую роль в системе играет RuntimeManager?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "COMPONENT_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_cycle_query_routes_to_system_flow_explain() -> None:
|
||||
result = run_sequence(["Объясни как работает цикл отправки уведомлений"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_overview_question_routes_to_general_qa() -> None:
|
||||
result = run_sequence(["Что вообще описано в документации по этому сервису?"])[0]
|
||||
|
||||
assert_intent(result, "GENERAL_QA")
|
||||
assert_sub_intent(result, "GENERIC_QA")
|
||||
assert getattr(result.retrieval_spec.filters, "doc_type", None) == "api_method"
|
||||
assert getattr(result.retrieval_spec.filters, "domain_name", None) == "notifications"
|
||||
assert getattr(result.retrieval_spec.filters, "scope_level", None) == "domain"
|
||||
|
||||
@@ -2,9 +2,9 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2
|
||||
from app.modules.agent.intent_router_v2.intent.classifier import IntentClassifierV2
|
||||
from app.modules.agent.intent_router_v2.intent.llm_disambiguator import DocsLlmDisambiguator
|
||||
from app.core.agent.intent_router import ConversationState, IntentRouterV2
|
||||
from app.core.agent.intent_router.docs_mvp.llm_classifier import DocsMvpLlmClassifier
|
||||
from app.core.agent.intent_router.intent.classifier import IntentClassifierV2
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
|
||||
@@ -21,75 +21,62 @@ class FakeLlm:
|
||||
return self.response
|
||||
|
||||
|
||||
def test_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"unused","confidence":"low"}')
|
||||
router = IntentRouterV2(
|
||||
def _router(llm: FakeLlm) -> IntentRouterV2:
|
||||
return IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
docs_llm_classifier=DocsMvpLlmClassifier(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
|
||||
result = router.route("Объясни endpoint /health", ConversationState(), repo_context())
|
||||
|
||||
assert result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.is_ambiguous is False
|
||||
assert result.routing_mode == "deterministic"
|
||||
def test_docs_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
|
||||
llm = FakeLlm("{}")
|
||||
result = _router(llm).route("Объясни endpoint /health", ConversationState(), repo_context())
|
||||
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.docs_routing.routing_mode == "deterministic"
|
||||
assert result.llm_router_used is False
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_ambiguous_query_can_be_resolved_by_llm() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"ENTITY_EXPLAIN","reason":"runtime health is a concept/entity here","confidence":"medium"}')
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
llm = FakeLlm(
|
||||
json.dumps(
|
||||
{
|
||||
"intent": "DOCS_DISCOVERY",
|
||||
"sub_intent": "FIND_DOCUMENTS_BY_DOMAIN",
|
||||
"confidence": 0.83,
|
||||
"anchors": {"entity_name": "health", "doc_query": "документация по health"},
|
||||
"scope": {"level": "domain"},
|
||||
"reason_short": "health here is a docs topic",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
result = _router(llm).route("документация по health", ConversationState(), repo_context())
|
||||
|
||||
result = router.route("Объясни runtime health", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is True
|
||||
assert result.routing_mode == "llm_disambiguation"
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.routing_mode == "llm_assisted"
|
||||
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
|
||||
assert result.llm_router_used is True
|
||||
assert result.deterministic_selected_sub_intent
|
||||
assert result.llm_router_selected_sub_intent == "ENTITY_EXPLAIN"
|
||||
assert result.query_plan.sub_intent == "ENTITY_EXPLAIN"
|
||||
|
||||
|
||||
def test_ambiguous_query_falls_back_to_deterministic_when_llm_fails() -> None:
|
||||
def test_ambiguous_query_falls_back_to_general_docs_when_llm_fails() -> None:
|
||||
llm = FakeLlm("{}", fail=True)
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
result = _router(llm).route("health документация", ConversationState(), repo_context())
|
||||
|
||||
result = router.route("Как работает health check runtime?", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is True
|
||||
assert result.routing_mode == "deterministic_fallback"
|
||||
assert result.llm_router_used is False
|
||||
assert result.llm_router_error == "llm unavailable"
|
||||
assert result.query_plan.sub_intent == result.deterministic_selected_sub_intent
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.routing_mode == "llm_fallback"
|
||||
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
|
||||
|
||||
|
||||
def test_overview_query_stays_in_generic_qa() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"overview query","confidence":"high"}')
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
def test_llm_classifier_rejects_unknown_labels() -> None:
|
||||
llm = FakeLlm(json.dumps({"intent": "DOCS_QA", "sub_intent": "MADE_UP"}))
|
||||
classifier = DocsMvpLlmClassifier(llm)
|
||||
|
||||
result = router.route("Какая структура документации?", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is False or result.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert result.intent == "GENERAL_QA"
|
||||
assert result.query_plan.sub_intent == "GENERIC_QA"
|
||||
|
||||
|
||||
def test_llm_disambiguator_rejects_unknown_labels() -> None:
|
||||
llm = FakeLlm(json.dumps({"sub_intent": "MADE_UP", "reason": "bad", "confidence": "high"}))
|
||||
disambiguator = DocsLlmDisambiguator(llm)
|
||||
|
||||
assert disambiguator.choose({"query": "test"}) is None
|
||||
assert classifier.classify({"query": "test"}) is None
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
|
||||
from app.core.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
|
||||
|
||||
|
||||
class _Embedder:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.indexing.common.path_filter import (
|
||||
from app.core.rag.indexing.common.path_filter import (
|
||||
count_indexable_change_upserts,
|
||||
filter_changes_for_indexing,
|
||||
filter_snapshot_files,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from app.modules.agent.intent_router_v2.analysis.normalization import QueryNormalizer
|
||||
from app.core.agent.intent_router.analysis.normalization import QueryNormalizer
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
from app.core.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
def test_extract_query_terms_from_code_question() -> None:
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from app.modules.rag.services.rag_service import RagService
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.service import RagService
|
||||
|
||||
|
||||
class _FakeEmbedder:
|
||||
@@ -50,3 +51,40 @@ def test_rag_service_progress_uses_only_indexable_files() -> None:
|
||||
assert cache_hits == 0
|
||||
assert cache_misses == 1
|
||||
assert progress == [(1, 1, "src/main.py")]
|
||||
|
||||
|
||||
def test_rag_service_keeps_docs_artifact_type_metadata() -> None:
|
||||
repository = _FakeRepository()
|
||||
service = RagService(embedder=_FakeEmbedder(), repository=repository)
|
||||
files = [
|
||||
{
|
||||
"path": "docs/api/health.md",
|
||||
"content_hash": "docs-h1",
|
||||
"content": """---
|
||||
id: api.health
|
||||
type: api_method
|
||||
doc_type: api_method
|
||||
title: Health API
|
||||
domain: runtime
|
||||
sub_domain: health
|
||||
related_docs: []
|
||||
status: active
|
||||
---
|
||||
# Health API
|
||||
|
||||
## Summary
|
||||
- Purpose: check service health.
|
||||
|
||||
## Details
|
||||
### Описание
|
||||
Returns health payload.
|
||||
""",
|
||||
}
|
||||
]
|
||||
|
||||
asyncio.run(service.index_snapshot("project-1", files))
|
||||
|
||||
doc_chunk = next(doc for doc in repository.replaced_docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
|
||||
assert doc_chunk.metadata["artifact_type"] == "DOCS"
|
||||
assert doc_chunk.metadata["domain"] == "runtime"
|
||||
assert doc_chunk.metadata["subdomain"] == "health"
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
"""Smoke-тест стандартного retrieval API: один embed и вызов repository."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder
|
||||
from app.core.rag.retrieval.session_retriever import RagSessionRetriever, RetrievalPlan
|
||||
|
||||
|
||||
def test_rag_session_retriever_calls_repository() -> None:
|
||||
embedder = MagicMock(spec=GigaChatEmbedder)
|
||||
embedder.embed = MagicMock(return_value=[[0.1, 0.2]])
|
||||
repo = MagicMock()
|
||||
repo.retrieve = MagicMock(return_value=[{"path": "a.md", "layer": "D0_DOC_CHUNKS"}])
|
||||
retriever = RagSessionRetriever(repository=repo, embedder=embedder)
|
||||
plan = RetrievalPlan(profile="test", layers=["D0_DOC_CHUNKS", "D1_DOCUMENT_CATALOG"], limit=5)
|
||||
rows = asyncio.run(retriever.retrieve("sid-1", "hello", plan))
|
||||
assert len(rows) == 1
|
||||
assert embedder.embed.called
|
||||
assert repo.retrieve.called
|
||||
call_kw = repo.retrieve.call_args
|
||||
assert call_kw[0][0] == "sid-1"
|
||||
assert call_kw[1]["layers"] == plan.layers
|
||||
assert call_kw[1]["limit"] == 5
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
|
||||
from app.modules.rag.retrieval.test_filter import build_test_filters, is_test_path
|
||||
from app.core.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
|
||||
from app.core.rag.retrieval.test_filter import build_test_filters, is_test_path
|
||||
|
||||
|
||||
def test_retrieve_builder_adds_test_exclusion_filters() -> None:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
|
||||
from app.core.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
|
||||
|
||||
|
||||
class _ExplodingEmbedder:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class _FakeGateway:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class _ProductionFirstGateway:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.trace_builder import TraceBuilder
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.trace_builder import TraceBuilder
|
||||
|
||||
|
||||
class _FakeGraphRepository:
|
||||
|
||||
Reference in New Issue
Block a user