Роутер работает нормально в process v2

This commit is contained in:
2026-04-07 14:09:51 +03:00
parent 5d77ab1a88
commit 6b74d410cd
1748 changed files with 216679 additions and 14208 deletions
@@ -2,7 +2,7 @@ from __future__ import annotations
import re
from app.modules.agent.intent_router_v2.models import IntentRouterResult
from app.core.agent.intent_router.models import IntentRouterResult
def assert_intent(out: IntentRouterResult, expected: str) -> None:
@@ -57,7 +57,7 @@ def assert_domain_layer_prefixes(out: IntentRouterResult) -> None:
prefixes = {layer.layer_id[0] for layer in out.retrieval_spec.layer_queries if layer.layer_id}
if out.retrieval_spec.domains == ["CODE"]:
assert prefixes <= {"C"}
elif out.retrieval_spec.domains == ["DOCS"]:
elif out.retrieval_spec.domains in (["DOCS"], ["GENERAL"]):
assert prefixes <= {"D"}
else:
assert prefixes <= {"C", "D"}
@@ -2,26 +2,22 @@ from __future__ import annotations
import json
from app.modules.rag.contracts.enums import RagLayer
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2, RepoContext
from app.core.rag.contracts.enums import RagLayer
from app.core.agent.intent_router import ConversationState, IntentRouterV2, RepoContext
def repo_context() -> RepoContext:
return RepoContext(
languages=["python"],
available_domains=["CODE", "DOCS"],
available_domains=["DOCS", "GENERAL"],
available_layers=[
RagLayer.CODE_ENTRYPOINTS,
RagLayer.CODE_SYMBOL_CATALOG,
RagLayer.CODE_DEPENDENCY_GRAPH,
RagLayer.CODE_SEMANTIC_ROLES,
RagLayer.CODE_SOURCE_CHUNKS,
RagLayer.DOCS_DOC_CHUNKS,
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_ENTITY_CATALOG,
RagLayer.DOCS_WORKFLOW_INDEX,
RagLayer.DOCS_RELATION_GRAPH,
RagLayer.DOCS_INTEGRATION_INDEX,
],
)
@@ -1,5 +1,5 @@
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.indexing.code.pipeline import CodeIndexingPipeline
def test_code_pipeline_builds_source_symbols_edges_and_entrypoints() -> None:
@@ -1,15 +1,18 @@
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.indexing.docs.pipeline import DocsIndexingPipeline
def test_docs_pipeline_builds_new_d0_to_d5_layers() -> None:
def test_docs_pipeline_builds_docs_layers_from_modern_markdown_structure() -> None:
pipeline = DocsIndexingPipeline()
content = """---
id: api.billing.create_invoice
type: api_method
doc_type: api_method
name: create_invoice
title: Create Invoice API
module: billing
domain: billing
sub_domain: invoices
layer: application
status: draft
updated_at: 2026-03-23
@@ -17,21 +20,26 @@ tags: [billing, api]
entities: [Invoice]
parent: billing_api
children: []
related_docs: [api.billing.validate_invoice]
links:
- type: related_api
target: api.billing.validate_invoice
called_by:
- ui.billing.invoice_form
uses_logic:
- logic.billing.invoice_validation
---
# Summary
# Create Invoice API
## Summary
Creates an invoice in billing.
# Details
## Details
## Описание
### Описание
Создает счет на оплату.
## Сценарий
### Сценарий
**Название:**
Create invoice
@@ -55,7 +63,12 @@ Create invoice
**Постусловие:**
- Invoice is created.
## Контракт
### Контракт
#### Метаданные вызова
- Method: POST
- Auth: USER
- Idempotency: false
### Входные параметры
@@ -69,7 +82,22 @@ Create invoice
| --- | --- | --- |
| invoice_id | string | yes |
## Ошибки
### Интеграции
#### Billing DB
- target: db.billing.invoices
- target_type: db
- direction: outbound
- interaction: writes
- via: invoice repository
- purpose: persist created invoices
- details:
- transaction: required
- tables:
- invoices
- invoice_items
### Ошибки
| status | error | client action |
| --- | --- | --- |
@@ -89,10 +117,14 @@ Create invoice
assert RagLayer.DOCS_ENTITY_CATALOG in layers
assert RagLayer.DOCS_WORKFLOW_INDEX in layers
assert RagLayer.DOCS_RELATION_GRAPH in layers
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
assert catalog_doc.metadata["module"] == "billing"
assert catalog_doc.metadata["domain"] == "billing"
assert catalog_doc.metadata["subdomain"] == "invoices"
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
assert any("has_field amount" in text for text in fact_texts)
@@ -108,6 +140,16 @@ Create invoice
relation_targets = [doc.metadata["target_id"] for doc in docs if doc.layer == RagLayer.DOCS_RELATION_GRAPH]
assert "billing_api" in relation_targets
assert "api.billing.validate_invoice" in relation_targets
assert "logic.billing.invoice_validation" in relation_targets
assert "Invoice" in relation_targets
chunk_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
assert chunk_doc.metadata["section_path"]
assert chunk_doc.metadata["artifact_type"] == "DOCS"
assert chunk_doc.metadata["domain"] == "billing"
assert chunk_doc.metadata["subdomain"] == "invoices"
integration_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_INTEGRATION_INDEX)
assert integration_doc.metadata["target"] == "db.billing.invoices"
assert integration_doc.metadata["target_type"] == "db"
assert integration_doc.metadata["details"]["transaction"] == "required"
+34 -7
View File
@@ -1,11 +1,12 @@
from __future__ import annotations
from app.modules.agent.intent_router_v2 import IntentRouterV2
from app.modules.agent.llm.prompt_loader import PromptLoader
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
from app.modules.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
from app.modules.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
from app.modules.agent.runtime.steps.generation import RuntimePromptSelector
from app.core.agent.intent_router import IntentRouterV2
from app.core.agent.llm.prompt_loader import PromptLoader
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
from app.core.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
from app.core.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
from app.core.agent.orchestration.processes.v2.prompt_payload_builder import V2PromptPayloadBuilder
from app.core.agent.runtime.steps.generation import RuntimePromptSelector
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
from tests.unit_tests.rag.intent_router_testkit import repo_context
@@ -43,7 +44,7 @@ def test_prompt_selector_uses_docs_prompts_only() -> None:
def test_docs_prompt_payload_contains_required_contract() -> None:
builder = DocsPromptPayloadBuilder()
from app.modules.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
payload = builder.build(
question="Объясни billing",
@@ -52,6 +53,8 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
evidence_bundle=DocsEvidenceBundle(
intent="DOCUMENTATION_EXPLAIN",
sub_intent="COMPONENT_EXPLAIN",
primary_documents=[{"title": "Billing"}],
secondary_documents=[{"title": "Billing relation"}],
documents=[{"title": "Billing"}],
facts=[{"content": "Handles payments"}],
relations=[{"title": "Billing -> Orders"}],
@@ -62,12 +65,36 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
assert '"question": "Объясни billing"' in payload
assert '"intent": "DOCUMENTATION_EXPLAIN"' in payload
assert '"sub_intent": "COMPONENT_EXPLAIN"' in payload
assert '"primary_documents"' in payload
assert '"secondary_documents"' in payload
assert '"documents"' in payload
assert '"facts"' in payload
assert '"relations"' in payload
assert '"api_contract"' in payload
def test_v2_prompt_payload_accepts_api_method_mode_fields() -> None:
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle
payload = V2PromptPayloadBuilder().build(
question="Как работает метод health?",
intent="DOCUMENTATION_EXPLAIN",
sub_intent="API_METHOD_EXPLAIN",
evidence_bundle=DocsEvidenceBundle(intent="DOCUMENTATION_EXPLAIN", sub_intent="API_METHOD_EXPLAIN"),
api_method_answer_mode="indirect",
target_endpoint_identity={
"anchor": "health",
"normalized_path": "/health",
"normalized_doc_id": "api.health_endpoint",
},
direct_api_spec_found=False,
)
assert '"api_method_answer_mode": "indirect"' in payload
assert '"normalized_doc_id": "api.health_endpoint"' in payload
assert '"direct_api_spec_found": false' in payload
def test_openapi_postprocessor_requires_paths_for_full_spec() -> None:
validator = OpenAPIPostprocessor()
+354 -5
View File
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.modules.agent.intent_router_v2 import IntentRouterV2
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
from app.core.agent.intent_router import IntentRouterV2
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner, DocsTaskPlanner
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
from tests.unit_tests.rag.intent_router_testkit import repo_context
@@ -135,6 +135,52 @@ def test_openapi_partial_contract_returns_partial_mode() -> None:
assert "/orders" in result.answer
def test_docs_pipeline_accepts_precomputed_task_plan_without_rerouting() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/api/health.md",
"title": "GET /health",
"content": "/health returns runtime and component statuses.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
{
"layer": "D2_FACT_INDEX",
"path": "docs/api/health.md",
"title": "api.health_endpoint:response",
"content": "Returns health summary and component diagnostics.",
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
},
]
route_result = IntentRouterV2().route(
"Объясни API метод /health",
repo_context=repo_context(),
)
task_plan = DocsTaskPlanner().plan(
"Объясни API метод /health",
"docs-session",
route_result=route_result,
)
class FailingRouter:
def route(self, *_args, **_kwargs):
raise AssertionError("runner should use the precomputed task plan")
runner = DocsQAPipelineRunner(FailingRouter(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run(
"Объясни API метод /health",
"docs-session",
mode="pre_llm_only",
task_plan=task_plan,
)
assert result.router_result.intent == "DOCUMENTATION_EXPLAIN"
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
assert result.diagnostics.gate_decision == "allow_exact"
def test_pre_llm_mode_returns_diagnostic_only_without_answer_generation() -> None:
rows = [
{
@@ -172,7 +218,7 @@ def test_pre_llm_mode_detects_path_anchor_candidates() -> None:
assert "/health" in result.diagnostics.query_anchor_candidates
assert "/health" in result.diagnostics.resolved_anchor_candidates
assert result.diagnostics.planned_layers == ["D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
assert result.diagnostics.planned_layers == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS", "D4_WORKFLOW_INDEX"]
assert set(result.diagnostics.executed_layers) == {"D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D0_DOC_CHUNKS"}
@@ -318,8 +364,311 @@ def test_openapi_request_fragment_uses_fragment_aware_gate() -> None:
assert result.answer_mode in {"ready", "ready_partial"}
assert result.answer
assert "type: object" in result.answer
assert "message:" in result.answer
assert "chat_id:" in result.answer
def test_api_method_explain_prefers_api_method_primary_doc() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/domain/runtime-health.md",
"title": "Сущность runtime health",
"content": "Runtime health describes overall service health.",
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/api/health.md",
"title": "GET /health",
"content": "/health returns runtime and component statuses.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
{
"layer": "D5_RELATION_GRAPH",
"path": "docs/domain/runtime-health.md",
"title": "Runtime health links",
"content": "runtime health used by health endpoint",
"metadata": {"document_id": "domain.runtime_health", "target_doc_id": "api.health_endpoint"},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
assert result.diagnostics.target_anchor in {"health", "/health"}
assert result.diagnostics.api_method_match_found is True
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
assert "api.health_endpoint" in result.diagnostics.primary_doc_candidates
assert result.diagnostics.evidence_gate_require_target_api_spec is True
assert result.diagnostics.evidence_gate_target_api_spec_found is True
assert result.answer_mode == "exact"
assert result.diagnostics.gate_decision == "allow_exact"
def test_api_method_explain_promotes_api_doc_via_links() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/domain/runtime-health.md",
"title": "Сущность runtime health",
"content": "Runtime health is the domain model for observability.",
"metadata": {
"document_id": "domain.runtime_health",
"type": "domain_entity",
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
},
},
{
"layer": "D0_DOC_CHUNKS",
"path": "docs/api/health.md",
"title": "api.health_endpoint:Overview",
"content": "Endpoint /health returns overall runtime status and component diagnostics.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Как работает health endpoint?", "docs-session", mode="pre_llm_only")
assert result.diagnostics.promoted_via_links == ["api.health_endpoint"]
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
assert result.diagnostics.api_method_match_found is True
def test_api_method_explain_rejects_cross_endpoint_primary_candidates() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/documentation/api/control-actions-endpoint.md",
"title": "HTTP API /actions/{action}",
"content": "Endpoint for controlling actions.",
"metadata": {
"document_id": "api.control_actions_endpoint",
"type": "api_method",
"endpoint": "/actions/{action}",
},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/documentation/api/health-endpoint.md",
"title": "HTTP API /health",
"content": "Health endpoint returns runtime health and component diagnostics.",
"metadata": {
"document_id": "api.health_endpoint",
"type": "api_method",
"endpoint": "/health",
},
},
{
"layer": "D0_DOC_CHUNKS",
"path": "docs/documentation/api/actions-endpoint.md",
"title": "api.control_actions_endpoint:Scenario",
"content": "The /actions/{action} endpoint triggers runtime actions.",
"metadata": {
"document_id": "api.control_actions_endpoint",
"type": "api_method",
"endpoint": "/actions/{action}",
},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
assert result.diagnostics.primary_api_documents_after_filter == ["api.health_endpoint"]
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
assert result.diagnostics.cross_endpoint_leakage_detected is True
assert result.diagnostics.evidence_gate_target_api_spec_found is True
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
def test_api_method_explain_without_exact_target_returns_insufficiency() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/documentation/api/control-actions-endpoint.md",
"title": "HTTP API /actions/{action}",
"content": "Endpoint for controlling actions.",
"metadata": {
"document_id": "api.control_actions_endpoint",
"type": "api_method",
"endpoint": "/actions/{action}",
},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/documentation/api/send-endpoint.md",
"title": "HTTP API /send",
"content": "Endpoint for sending messages.",
"metadata": {
"document_id": "api.send_message_endpoint",
"type": "api_method",
"endpoint": "/send",
},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
assert result.diagnostics.selected_primary_documents == []
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
assert "api.send_message_endpoint" in result.diagnostics.rejected_endpoint_candidates
assert result.diagnostics.target_api_spec_found_exact is False
assert result.diagnostics.evidence_gate_target_api_spec_found is False
assert result.diagnostics.gate_decision == "reject"
assert result.answer_mode == "insufficient"
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
assert "api.send_message_endpoint" not in result.diagnostics.selected_doc_ids
def test_api_method_explain_uses_indirect_mode_from_target_linked_docs() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/domain/runtime-health.md",
"title": "Runtime health",
"content": "Runtime health describes overall service state and component diagnostics.",
"metadata": {
"document_id": "domain.runtime_health",
"type": "domain_entity",
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
assert result.answer_mode == "indirect"
assert result.diagnostics.gate_decision == "allow_indirect"
assert result.diagnostics.raw_retrieval_non_empty is True
assert result.diagnostics.target_primary_context_non_empty is False
assert result.diagnostics.indirect_target_context_non_empty is True
assert result.diagnostics.graph_promotion_attempted is True
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
assert result.diagnostics.promoted_target_loaded is False
assert result.diagnostics.materialization_failure_reason == "materialized_rows_empty"
assert result.diagnostics.final_primary_document_ids == []
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
def test_api_method_explain_skips_llm_when_no_exact_or_indirect_context() -> None:
from tests.unit_tests.rag.test_docs_prompt_layer import FakeLlm
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/documentation/api/send-endpoint.md",
"title": "HTTP API /send",
"content": "Endpoint for sending messages.",
"metadata": {
"document_id": "api.send_message_endpoint",
"type": "api_method",
"endpoint": "/send",
},
}
]
llm = FakeLlm("should not be called")
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context(), llm=llm)
result = runner.run("Что делает метод health?", "docs-session")
assert llm.calls == []
assert result.answer_mode == "insufficient"
assert result.diagnostics.llm_called is False
assert result.diagnostics.llm_call_reason == "no_exact_or_indirect_target_context"
assert result.diagnostics.gate_decision == "reject"
def test_api_method_explain_materializes_promoted_target_into_primary_context() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/domain/runtime-health.md",
"title": "Runtime health",
"content": "Runtime health describes service state and component diagnostics.",
"metadata": {
"document_id": "domain.runtime_health",
"type": "domain_entity",
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
},
}
]
materialized_rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/api/health.md",
"title": "GET /health",
"content": "/health returns runtime and component statuses.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
{
"layer": "D2_FACT_INDEX",
"path": "docs/api/health.md",
"title": "api.health_endpoint:response",
"content": "Returns health summary and component diagnostics.",
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
},
{
"layer": "D0_DOC_CHUNKS",
"path": "docs/api/health.md",
"title": "api.health_endpoint:Overview",
"content": "Endpoint /health returns overall runtime health.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
]
runner = DocsQAPipelineRunner(
IntentRouterV2(),
InMemoryDocsRetrievalAdapter(rows, materialized_rows=materialized_rows),
repo_context=repo_context(),
)
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
assert result.answer_mode == "exact"
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
assert result.diagnostics.graph_promotion_materialized == ["api.health_endpoint"]
assert result.diagnostics.promoted_target_loaded is True
assert result.diagnostics.promoted_target_chunks_loaded == 1
assert result.diagnostics.promoted_target_facts_loaded == 1
assert result.diagnostics.pinned_document_ids == ["api.health_endpoint"]
assert result.diagnostics.final_primary_document_ids == ["api.health_endpoint"]
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
assert result.diagnostics.materialized_target_primary_context_non_empty is True
assert result.diagnostics.gate_decision == "allow_exact"
def test_entity_question_does_not_prefer_api_method_primary_doc() -> None:
rows = [
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/domain/runtime-health.md",
"title": "Сущность runtime health",
"content": "Runtime health describes service state.",
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
},
{
"layer": "D1_DOCUMENT_CATALOG",
"path": "docs/api/health.md",
"title": "GET /health",
"content": "/health returns runtime status.",
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
},
]
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
result = runner.run("Что такое runtime health?", "docs-session", mode="pre_llm_only")
assert result.router_result.query_plan.sub_intent == "ENTITY_EXPLAIN"
assert result.diagnostics.selected_primary_documents == []
assert result.diagnostics.api_method_match_found is False
assert result.answer == ""
def test_openapi_method_with_only_path_is_rejected() -> None:
@@ -0,0 +1,32 @@
import pytest
from tests.unit_tests.rag.intent_router_testkit import run_sequence
pytestmark = pytest.mark.intent_router
@pytest.mark.parametrize(
("query", "plan_id", "primary_doc_types", "secondary_doc_types", "expected_filter_key", "expected_filter_value"),
[
("объясни /health", "docs_api_method_explain_v1", ["api_method"], ["logic_block", "domain_entity", "architecture_overview"], "endpoint_path", "/health"),
("какие методы в notifications", "docs_list_api_methods_v1", ["api_method"], [], "domain_name", "notifications"),
("найди документацию по telegram_delivery", "docs_find_documents_by_domain_v1", ["index_page", "architecture_overview", "api_method", "logic_block", "domain_entity"], [], "domain_name", "telegram_delivery"),
("сгенерируй openapi по /send", "docs_generate_openapi_v1", ["api_method"], ["domain_entity", "logic_block"], "endpoint_path", "/send"),
("как устроен сервис", "docs_general_docs_qa_v1", ["index_page", "architecture_overview"], ["logic_block", "domain_entity", "api_method"], "scope_level", "project"),
],
)
def test_docs_retrieval_plan_contracts(
query: str,
plan_id: str,
primary_doc_types: list[str],
secondary_doc_types: list[str],
expected_filter_key: str,
expected_filter_value: str,
) -> None:
result = run_sequence([query])[0]
assert result.retrieval_plan is not None
assert result.retrieval_plan.plan_id == plan_id
assert result.retrieval_plan.primary_doc_types == primary_doc_types
assert result.retrieval_plan.secondary_doc_types == secondary_doc_types
assert result.retrieval_plan.filters[expected_filter_key] == expected_filter_value
@@ -1,4 +1,4 @@
from app.modules.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
from app.core.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
def test_explain_intent_builder_extracts_route_symbol_and_file_hints() -> None:
@@ -2,13 +2,12 @@ import os
import pytest
from app.modules.agent.intent_router_v2.factory import GigaChatIntentRouterFactory
from app.modules.shared.env_loader import load_workspace_env
from app.core.agent.intent_router.factory import GigaChatIntentRouterFactory
from app.core.shared.config import load_workspace_env
from tests.unit_tests.rag.asserts_intent_router import (
assert_domains,
assert_file_only_scope,
assert_intent,
assert_test_policy,
assert_path_scope,
)
from tests.unit_tests.rag.intent_router_testkit import run_sequence
@@ -29,7 +28,7 @@ def test_e2e_path_carryover_flow() -> None:
]
)
assert_file_only_scope(first, "app/core/config.py")
assert_path_scope(first, "app/core/config.py", "app/core")
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
assert "app/core/config.py" in third.retrieval_spec.filters.path_scope
second_file_anchors = [anchor.value for anchor in second.query_plan.anchors if anchor.type == "FILE_PATH" and anchor.source == "conversation_state"]
@@ -39,7 +38,10 @@ def test_e2e_path_carryover_flow() -> None:
assert any(anchor.type == "FILE_PATH" and anchor.source == "conversation_state" and anchor.span is None for anchor in third.query_plan.anchors)
carried_symbols = [anchor.value for anchor in third.query_plan.anchors if anchor.type == "SYMBOL" and anchor.source == "conversation_state"]
assert carried_symbols in ([], ["load_config"])
assert third.query_plan.sub_intent == "EXPLAIN_LOCAL"
assert_intent(first, "GENERAL_QA")
assert_intent(second, "GENERAL_QA")
assert_intent(third, "GENERAL_QA")
assert third.query_plan.sub_intent == "GENERIC_QA"
layer_ids = [item.layer_id for item in third.retrieval_spec.layer_queries]
assert "C3_ENTRYPOINTS" not in layer_ids
@@ -52,9 +54,9 @@ def test_e2e_docs_switch_from_code_topic() -> None:
]
)
assert_intent(first, "CODE_QA")
assert_intent(first, "DOCUMENTATION_EXPLAIN")
assert_intent(second, "DOCUMENTATION_EXPLAIN")
assert second.conversation_mode == "SWITCH"
assert second.conversation_mode == "CONTINUE"
assert_domains(second, ["DOCS"])
carried = [
anchor
@@ -75,12 +77,10 @@ def test_e2e_tests_toggle_flow() -> None:
]
)
assert_intent(first, "CODE_QA")
assert_intent(second, "CODE_QA")
assert_test_policy(first, "INCLUDE")
assert_test_policy(second, "EXCLUDE")
assert first.query_plan.sub_intent == "FIND_TESTS"
assert second.query_plan.sub_intent == "EXPLAIN"
assert_intent(first, "GENERAL_QA")
assert_intent(second, "GENERAL_QA")
assert first.query_plan.sub_intent == "GENERIC_QA"
assert second.query_plan.sub_intent == "GENERIC_QA"
assert "tests" in second.query_plan.negations
assert not second.query_plan.expansions
assert second.evidence_policy.require_flow is False
@@ -94,9 +94,9 @@ def test_e2e_open_file_then_generic_next_steps_is_lightweight() -> None:
]
)
assert_file_only_scope(first, "app/core/config.py")
assert_file_only_scope(second, "app/core/config.py")
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
assert_path_scope(first, "app/core/config.py", "app/core")
assert_path_scope(second, "app/core/config.py", "app/core")
assert second.query_plan.sub_intent == "GENERIC_QA"
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
assert "C3_ENTRYPOINTS" not in layer_ids
assert second.evidence_policy.require_flow is False
@@ -118,9 +118,9 @@ def test_intent_router_live_smoke_path_carryover() -> None:
trace_label="intent-router-live",
)
assert_file_only_scope(first, "app/core/config.py")
assert_path_scope(first, "app/core/config.py", "app/core")
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
assert second.query_plan.sub_intent == "GENERIC_QA"
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
assert "C3_ENTRYPOINTS" not in layer_ids
assert second.evidence_policy.require_flow is False
@@ -1,204 +1,121 @@
import pytest
from tests.unit_tests.rag.asserts_intent_router import (
assert_domain_layer_prefixes,
assert_domains,
assert_file_only_scope,
assert_has_file_path,
assert_intent,
assert_no_symbol_keyword,
assert_no_symbol_leakage_from_paths,
assert_spans_valid,
assert_sub_intent,
assert_test_policy,
)
from tests.unit_tests.rag.intent_router_testkit import run_sequence
pytestmark = pytest.mark.intent_router
def test_invariant_code_file_path_with_canonical_key_term() -> None:
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
assert_intent(result, "CODE_QA")
assert_has_file_path(result, "app/core/config.py")
assert_file_only_scope(result, "app/core/config.py")
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
assert "файл" in key_terms
assert "файлу" not in key_terms
assert_spans_valid(result)
assert_domain_layer_prefixes(result)
def _docs_result(query: str):
result = run_sequence([query])[0]
assert result.docs_routing is not None
assert result.retrieval_plan is not None
return result
def test_invariant_open_file_for_specified_file_phrase_uses_narrow_layers() -> None:
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
@pytest.mark.parametrize(
("query", "endpoint"),
[
("как работает метод health", "/health"),
("объясни /health", "/health"),
("что делает endpoint /send", "/send"),
],
)
def test_docs_mvp_api_method_explain_cases(query: str, endpoint: str) -> None:
result = _docs_result(query)
assert_intent(result, "CODE_QA")
assert_sub_intent(result, "OPEN_FILE")
assert_file_only_scope(result, "app/core/config.py")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["C0_SOURCE_CHUNKS"]
assert result.evidence_policy.require_flow is False
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
assert result.docs_routing.intent == "DOCS_QA"
assert result.docs_routing.scope.level == "method"
assert result.docs_routing.anchors.endpoint_path == endpoint
assert result.retrieval_plan.plan_id == "docs_api_method_explain_v1"
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
def test_invariant_inline_code_span_routes_to_code_and_extracts_symbol() -> None:
result = run_sequence(["Уточни по коду `def build(x): return x`"])[0]
@pytest.mark.parametrize(
("query", "scope_level", "domain_name"),
[
("какие есть методы в проекте", "project", None),
("покажи все api", "project", None),
("какие методы в notifications", "domain", "notifications"),
],
)
def test_docs_mvp_list_api_methods_cases(query: str, scope_level: str, domain_name: str | None) -> None:
result = _docs_result(query)
assert_intent(result, "CODE_QA")
assert_spans_valid(result)
assert_no_symbol_keyword(result)
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
assert "build" in symbols
assert "def" in key_terms
assert result.docs_routing.sub_intent == "LIST_API_METHODS"
assert result.docs_routing.intent == "DOCS_DISCOVERY"
assert result.docs_routing.scope.level == scope_level
assert result.retrieval_plan.plan_id == "docs_list_api_methods_v1"
assert result.retrieval_plan.primary_doc_types == ["api_method"]
if domain_name:
assert result.retrieval_plan.filters["domain_name"] == domain_name
def test_invariant_docs_cyrillic_path_with_quotes() -> None:
result = run_sequence(["Что сказано в «docs/архитектура.md»?"])[0]
@pytest.mark.parametrize(
("query", "domain_name", "subdomain_name", "entity_name"),
[
("какие документы есть по notifications", "notifications", None, None),
("найди документацию по telegram_delivery", "telegram_delivery", None, None),
("какие документы связаны с health", None, None, "health"),
],
)
def test_docs_mvp_find_documents_cases(
query: str,
domain_name: str | None,
subdomain_name: str | None,
entity_name: str | None,
) -> None:
result = _docs_result(query)
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "COMPONENT_EXPLAIN")
assert_domains(result, ["DOCS"])
assert "docs/архитектура.md" in result.query_plan.normalized
assert_has_file_path(result, "docs/архитектура.md")
assert any(anchor.type == "DOC_REF" for anchor in result.query_plan.anchors)
assert result.retrieval_spec.filters.doc_kinds == []
assert_spans_valid(result)
assert_domain_layer_prefixes(result)
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
assert result.docs_routing.intent == "DOCS_DISCOVERY"
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
if domain_name:
assert result.retrieval_plan.filters["domain_name"] == domain_name
if subdomain_name:
assert result.retrieval_plan.filters["subdomain_name"] == subdomain_name
if entity_name:
assert result.retrieval_plan.filters["entity_name"] == entity_name
def test_invariant_file_check_phrase_not_project_misc() -> None:
result = run_sequence(["Проверь app/modules/rag/explain/intent_builder.py и объясни"])[0]
@pytest.mark.parametrize(
("query", "scope_level", "endpoint"),
[
("сгенерируй openapi по /health", "method", "/health"),
("собери swagger по notifications", "domain", None),
("сделай спецификацию api по всему проекту", "project", None),
],
)
def test_docs_mvp_generate_openapi_cases(query: str, scope_level: str, endpoint: str | None) -> None:
result = _docs_result(query)
assert_intent(result, "CODE_QA")
assert_domains(result, ["CODE"])
assert_no_symbol_leakage_from_paths(result)
assert_domain_layer_prefixes(result)
assert result.docs_routing.sub_intent == "GENERATE_OPENAPI"
assert result.docs_routing.intent == "DOCS_GENERATION"
assert result.docs_routing.scope.level == scope_level
assert result.retrieval_plan.plan_id == "docs_generate_openapi_v1"
if endpoint:
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
def test_invariant_tests_include_routing() -> None:
result = run_sequence(["Где тесты на ConfigManager?"])[0]
@pytest.mark.parametrize(
"query",
[
"что делает это приложение",
"как устроен сервис",
"как связаны worker и api",
],
)
def test_docs_mvp_general_docs_qa_cases(query: str) -> None:
result = _docs_result(query)
assert_intent(result, "CODE_QA")
assert_test_policy(result, "INCLUDE")
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
assert "ConfigManager" in symbols
assert "тест" in key_terms
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
assert result.docs_routing.intent == "DOCS_FALLBACK"
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
def test_invariant_keyword_hints_and_expansions_for_function_identifier() -> None:
result = run_sequence(["Теперь объясни функцию load_config"])[0]
def test_docs_mvp_retrieval_filters_are_merged_into_legacy_spec() -> None:
result = _docs_result("какие методы в notifications")
assert_intent(result, "CODE_QA")
assert "load_config" in result.query_plan.keyword_hints
assert "функция" not in result.query_plan.keyword_hints
assert "def" not in result.query_plan.expansions
def test_invariant_open_file_sub_intent_uses_narrow_retrieval_profile() -> None:
result = run_sequence(["Открой файл app/core/config.py"])[0]
assert_intent(result, "CODE_QA")
assert_sub_intent(result, "OPEN_FILE")
assert_file_only_scope(result, "app/core/config.py")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert "C0_SOURCE_CHUNKS" in layer_ids
assert "C1_SYMBOL_CATALOG" not in layer_ids
assert "C2_DEPENDENCY_GRAPH" not in layer_ids
assert "C3_ENTRYPOINTS" not in layer_ids
assert result.evidence_policy.require_flow is False
def test_invariant_docs_question_routes_to_docs() -> None:
result = run_sequence(["Что сказано в документации?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_domains(result, ["DOCS"])
assert_domain_layer_prefixes(result)
assert result.query_plan.keyword_hints
assert any(item in result.query_plan.expansions for item in result.query_plan.keyword_hints)
def test_invariant_docs_flow_sub_intent_uses_workflow_layers() -> None:
result = run_sequence(["Как работает процесс создания заказа по документации?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D4_WORKFLOW_INDEX", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
def test_invariant_docs_entity_sub_intent_uses_entity_layers() -> None:
result = run_sequence(["Что такое сущность Order в документации?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "ENTITY_EXPLAIN")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D3_ENTITY_CATALOG", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
def test_invariant_entity_like_camel_case_prefers_entity_explain() -> None:
result = run_sequence(["Что такое WorkerHealth?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "ENTITY_EXPLAIN")
def test_invariant_related_docs_routes_to_docs_explain() -> None:
result = run_sequence(["Найди документацию по billing"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
def test_invariant_docs_navigation_uses_related_docs_explain() -> None:
result = run_sequence(["Что связано с checkout документацией?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
def test_invariant_openapi_routes_to_docs_layers_with_api_filter() -> None:
result = run_sequence(["Сгенерируй openapi yaml для создания заказа"])[0]
assert_intent(result, "OPENAPI_GENERATION")
assert_sub_intent(result, "OPENAPI_METHOD_GENERATE")
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS"]
assert result.retrieval_spec.filters.doc_type == "api_method"
def test_invariant_general_qa_routes_to_generic_docs_layers() -> None:
result = run_sequence(["Помоги разобраться"])[0]
assert_intent(result, "GENERAL_QA")
assert_sub_intent(result, "GENERIC_QA")
assert_domains(result, ["DOCS"])
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
def test_invariant_component_like_manager_routes_to_component_explain() -> None:
result = run_sequence(["Какую роль в системе играет RuntimeManager?"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "COMPONENT_EXPLAIN")
def test_invariant_cycle_query_routes_to_system_flow_explain() -> None:
result = run_sequence(["Объясни как работает цикл отправки уведомлений"])[0]
assert_intent(result, "DOCUMENTATION_EXPLAIN")
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
def test_invariant_overview_question_routes_to_general_qa() -> None:
result = run_sequence(["Что вообще описано в документации по этому сервису?"])[0]
assert_intent(result, "GENERAL_QA")
assert_sub_intent(result, "GENERIC_QA")
assert getattr(result.retrieval_spec.filters, "doc_type", None) == "api_method"
assert getattr(result.retrieval_spec.filters, "domain_name", None) == "notifications"
assert getattr(result.retrieval_spec.filters, "scope_level", None) == "domain"
@@ -2,9 +2,9 @@ from __future__ import annotations
import json
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2
from app.modules.agent.intent_router_v2.intent.classifier import IntentClassifierV2
from app.modules.agent.intent_router_v2.intent.llm_disambiguator import DocsLlmDisambiguator
from app.core.agent.intent_router import ConversationState, IntentRouterV2
from app.core.agent.intent_router.docs_mvp.llm_classifier import DocsMvpLlmClassifier
from app.core.agent.intent_router.intent.classifier import IntentClassifierV2
from tests.unit_tests.rag.intent_router_testkit import repo_context
@@ -21,75 +21,62 @@ class FakeLlm:
return self.response
def test_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"unused","confidence":"low"}')
router = IntentRouterV2(
def _router(llm: FakeLlm) -> IntentRouterV2:
return IntentRouterV2(
classifier=IntentClassifierV2(),
llm_disambiguator=DocsLlmDisambiguator(llm),
docs_llm_classifier=DocsMvpLlmClassifier(llm),
enable_llm_disambiguation=True,
)
result = router.route("Объясни endpoint /health", ConversationState(), repo_context())
assert result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
assert result.is_ambiguous is False
assert result.routing_mode == "deterministic"
def test_docs_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
llm = FakeLlm("{}")
result = _router(llm).route("Объясни endpoint /health", ConversationState(), repo_context())
assert result.docs_routing is not None
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
assert result.docs_routing.routing_mode == "deterministic"
assert result.llm_router_used is False
assert llm.calls == []
def test_ambiguous_query_can_be_resolved_by_llm() -> None:
llm = FakeLlm('{"sub_intent":"ENTITY_EXPLAIN","reason":"runtime health is a concept/entity here","confidence":"medium"}')
router = IntentRouterV2(
classifier=IntentClassifierV2(),
llm_disambiguator=DocsLlmDisambiguator(llm),
enable_llm_disambiguation=True,
llm = FakeLlm(
json.dumps(
{
"intent": "DOCS_DISCOVERY",
"sub_intent": "FIND_DOCUMENTS_BY_DOMAIN",
"confidence": 0.83,
"anchors": {"entity_name": "health", "doc_query": "документация по health"},
"scope": {"level": "domain"},
"reason_short": "health here is a docs topic",
},
ensure_ascii=False,
)
)
result = _router(llm).route("документация по health", ConversationState(), repo_context())
result = router.route("Объясни runtime health", ConversationState(), repo_context())
assert result.is_ambiguous is True
assert result.routing_mode == "llm_disambiguation"
assert result.docs_routing is not None
assert result.docs_routing.routing_mode == "llm_assisted"
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
assert result.retrieval_plan is not None
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
assert result.llm_router_used is True
assert result.deterministic_selected_sub_intent
assert result.llm_router_selected_sub_intent == "ENTITY_EXPLAIN"
assert result.query_plan.sub_intent == "ENTITY_EXPLAIN"
def test_ambiguous_query_falls_back_to_deterministic_when_llm_fails() -> None:
def test_ambiguous_query_falls_back_to_general_docs_when_llm_fails() -> None:
llm = FakeLlm("{}", fail=True)
router = IntentRouterV2(
classifier=IntentClassifierV2(),
llm_disambiguator=DocsLlmDisambiguator(llm),
enable_llm_disambiguation=True,
)
result = _router(llm).route("health документация", ConversationState(), repo_context())
result = router.route("Как работает health check runtime?", ConversationState(), repo_context())
assert result.is_ambiguous is True
assert result.routing_mode == "deterministic_fallback"
assert result.llm_router_used is False
assert result.llm_router_error == "llm unavailable"
assert result.query_plan.sub_intent == result.deterministic_selected_sub_intent
assert result.docs_routing is not None
assert result.docs_routing.routing_mode == "llm_fallback"
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
assert result.retrieval_plan is not None
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
def test_overview_query_stays_in_generic_qa() -> None:
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"overview query","confidence":"high"}')
router = IntentRouterV2(
classifier=IntentClassifierV2(),
llm_disambiguator=DocsLlmDisambiguator(llm),
enable_llm_disambiguation=True,
)
def test_llm_classifier_rejects_unknown_labels() -> None:
llm = FakeLlm(json.dumps({"intent": "DOCS_QA", "sub_intent": "MADE_UP"}))
classifier = DocsMvpLlmClassifier(llm)
result = router.route("Какая структура документации?", ConversationState(), repo_context())
assert result.is_ambiguous is False or result.query_plan.sub_intent == "GENERIC_QA"
assert result.intent == "GENERAL_QA"
assert result.query_plan.sub_intent == "GENERIC_QA"
def test_llm_disambiguator_rejects_unknown_labels() -> None:
llm = FakeLlm(json.dumps({"sub_intent": "MADE_UP", "reason": "bad", "confidence": "high"}))
disambiguator = DocsLlmDisambiguator(llm)
assert disambiguator.choose({"query": "test"}) is None
assert classifier.classify({"query": "test"}) is None
+1 -1
View File
@@ -1,4 +1,4 @@
from app.modules.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
from app.core.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
class _Embedder:
+1 -1
View File
@@ -1,6 +1,6 @@
from __future__ import annotations
from app.modules.rag.indexing.common.path_filter import (
from app.core.rag.indexing.common.path_filter import (
count_indexable_change_upserts,
filter_changes_for_indexing,
filter_snapshot_files,
@@ -1,6 +1,6 @@
import pytest
from app.modules.agent.intent_router_v2.analysis.normalization import QueryNormalizer
from app.core.agent.intent_router.analysis.normalization import QueryNormalizer
pytestmark = pytest.mark.intent_router
+1 -1
View File
@@ -1,4 +1,4 @@
from app.modules.rag.retrieval.query_terms import extract_query_terms
from app.core.rag.retrieval.query_terms import extract_query_terms
def test_extract_query_terms_from_code_question() -> None:
@@ -2,7 +2,8 @@ from __future__ import annotations
import asyncio
from app.modules.rag.services.rag_service import RagService
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.indexing.service import RagService
class _FakeEmbedder:
@@ -50,3 +51,40 @@ def test_rag_service_progress_uses_only_indexable_files() -> None:
assert cache_hits == 0
assert cache_misses == 1
assert progress == [(1, 1, "src/main.py")]
def test_rag_service_keeps_docs_artifact_type_metadata() -> None:
repository = _FakeRepository()
service = RagService(embedder=_FakeEmbedder(), repository=repository)
files = [
{
"path": "docs/api/health.md",
"content_hash": "docs-h1",
"content": """---
id: api.health
type: api_method
doc_type: api_method
title: Health API
domain: runtime
sub_domain: health
related_docs: []
status: active
---
# Health API
## Summary
- Purpose: check service health.
## Details
### Описание
Returns health payload.
""",
}
]
asyncio.run(service.index_snapshot("project-1", files))
doc_chunk = next(doc for doc in repository.replaced_docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
assert doc_chunk.metadata["artifact_type"] == "DOCS"
assert doc_chunk.metadata["domain"] == "runtime"
assert doc_chunk.metadata["subdomain"] == "health"
@@ -0,0 +1,26 @@
"""Smoke-тест стандартного retrieval API: один embed и вызов repository."""
from __future__ import annotations
import asyncio
from unittest.mock import MagicMock
from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder
from app.core.rag.retrieval.session_retriever import RagSessionRetriever, RetrievalPlan
def test_rag_session_retriever_calls_repository() -> None:
embedder = MagicMock(spec=GigaChatEmbedder)
embedder.embed = MagicMock(return_value=[[0.1, 0.2]])
repo = MagicMock()
repo.retrieve = MagicMock(return_value=[{"path": "a.md", "layer": "D0_DOC_CHUNKS"}])
retriever = RagSessionRetriever(repository=repo, embedder=embedder)
plan = RetrievalPlan(profile="test", layers=["D0_DOC_CHUNKS", "D1_DOCUMENT_CATALOG"], limit=5)
rows = asyncio.run(retriever.retrieve("sid-1", "hello", plan))
assert len(rows) == 1
assert embedder.embed.called
assert repo.retrieve.called
call_kw = repo.retrieve.call_args
assert call_kw[0][0] == "sid-1"
assert call_kw[1]["layers"] == plan.layers
assert call_kw[1]["limit"] == 5
@@ -1,5 +1,5 @@
from app.modules.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
from app.modules.rag.retrieval.test_filter import build_test_filters, is_test_path
from app.core.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
from app.core.rag.retrieval.test_filter import build_test_filters, is_test_path
def test_retrieve_builder_adds_test_exclusion_filters() -> None:
@@ -1,4 +1,4 @@
from app.modules.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
from app.core.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
class _ExplodingEmbedder:
@@ -1,5 +1,5 @@
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
class _FakeGateway:
@@ -1,7 +1,7 @@
from types import SimpleNamespace
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
class _ProductionFirstGateway:
+2 -2
View File
@@ -1,5 +1,5 @@
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.modules.agent.runtime.steps.explain.trace_builder import TraceBuilder
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
from app.core.agent.runtime.steps.explain.trace_builder import TraceBuilder
class _FakeGraphRepository: