Роутер работает нормально в process v2
This commit is contained in:
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.agent.intent_router_v2.models import IntentRouterResult
|
||||
from app.core.agent.intent_router.models import IntentRouterResult
|
||||
|
||||
|
||||
def assert_intent(out: IntentRouterResult, expected: str) -> None:
|
||||
@@ -57,7 +57,7 @@ def assert_domain_layer_prefixes(out: IntentRouterResult) -> None:
|
||||
prefixes = {layer.layer_id[0] for layer in out.retrieval_spec.layer_queries if layer.layer_id}
|
||||
if out.retrieval_spec.domains == ["CODE"]:
|
||||
assert prefixes <= {"C"}
|
||||
elif out.retrieval_spec.domains == ["DOCS"]:
|
||||
elif out.retrieval_spec.domains in (["DOCS"], ["GENERAL"]):
|
||||
assert prefixes <= {"D"}
|
||||
else:
|
||||
assert prefixes <= {"C", "D"}
|
||||
|
||||
@@ -2,26 +2,22 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2, RepoContext
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.agent.intent_router import ConversationState, IntentRouterV2, RepoContext
|
||||
|
||||
|
||||
def repo_context() -> RepoContext:
|
||||
return RepoContext(
|
||||
languages=["python"],
|
||||
available_domains=["CODE", "DOCS"],
|
||||
available_domains=["DOCS", "GENERAL"],
|
||||
available_layers=[
|
||||
RagLayer.CODE_ENTRYPOINTS,
|
||||
RagLayer.CODE_SYMBOL_CATALOG,
|
||||
RagLayer.CODE_DEPENDENCY_GRAPH,
|
||||
RagLayer.CODE_SEMANTIC_ROLES,
|
||||
RagLayer.CODE_SOURCE_CHUNKS,
|
||||
RagLayer.DOCS_DOC_CHUNKS,
|
||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||
RagLayer.DOCS_FACT_INDEX,
|
||||
RagLayer.DOCS_ENTITY_CATALOG,
|
||||
RagLayer.DOCS_WORKFLOW_INDEX,
|
||||
RagLayer.DOCS_RELATION_GRAPH,
|
||||
RagLayer.DOCS_INTEGRATION_INDEX,
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.code.pipeline import CodeIndexingPipeline
|
||||
|
||||
|
||||
def test_code_pipeline_builds_source_symbols_edges_and_entrypoints() -> None:
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
from app.modules.rag.contracts.enums import RagLayer
|
||||
from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.docs.pipeline import DocsIndexingPipeline
|
||||
|
||||
|
||||
def test_docs_pipeline_builds_new_d0_to_d5_layers() -> None:
|
||||
def test_docs_pipeline_builds_docs_layers_from_modern_markdown_structure() -> None:
|
||||
pipeline = DocsIndexingPipeline()
|
||||
content = """---
|
||||
id: api.billing.create_invoice
|
||||
type: api_method
|
||||
doc_type: api_method
|
||||
name: create_invoice
|
||||
title: Create Invoice API
|
||||
module: billing
|
||||
domain: billing
|
||||
sub_domain: invoices
|
||||
layer: application
|
||||
status: draft
|
||||
updated_at: 2026-03-23
|
||||
@@ -17,21 +20,26 @@ tags: [billing, api]
|
||||
entities: [Invoice]
|
||||
parent: billing_api
|
||||
children: []
|
||||
related_docs: [api.billing.validate_invoice]
|
||||
links:
|
||||
- type: related_api
|
||||
target: api.billing.validate_invoice
|
||||
called_by:
|
||||
- ui.billing.invoice_form
|
||||
uses_logic:
|
||||
- logic.billing.invoice_validation
|
||||
---
|
||||
# Summary
|
||||
# Create Invoice API
|
||||
|
||||
## Summary
|
||||
|
||||
Creates an invoice in billing.
|
||||
|
||||
# Details
|
||||
## Details
|
||||
|
||||
## Описание
|
||||
### Описание
|
||||
|
||||
Создает счет на оплату.
|
||||
|
||||
## Сценарий
|
||||
### Сценарий
|
||||
|
||||
**Название:**
|
||||
Create invoice
|
||||
@@ -55,7 +63,12 @@ Create invoice
|
||||
**Постусловие:**
|
||||
- Invoice is created.
|
||||
|
||||
## Контракт
|
||||
### Контракт
|
||||
|
||||
#### Метаданные вызова
|
||||
- Method: POST
|
||||
- Auth: USER
|
||||
- Idempotency: false
|
||||
|
||||
### Входные параметры
|
||||
|
||||
@@ -69,7 +82,22 @@ Create invoice
|
||||
| --- | --- | --- |
|
||||
| invoice_id | string | yes |
|
||||
|
||||
## Ошибки
|
||||
### Интеграции
|
||||
|
||||
#### Billing DB
|
||||
- target: db.billing.invoices
|
||||
- target_type: db
|
||||
- direction: outbound
|
||||
- interaction: writes
|
||||
- via: invoice repository
|
||||
- purpose: persist created invoices
|
||||
- details:
|
||||
- transaction: required
|
||||
- tables:
|
||||
- invoices
|
||||
- invoice_items
|
||||
|
||||
### Ошибки
|
||||
|
||||
| status | error | client action |
|
||||
| --- | --- | --- |
|
||||
@@ -89,10 +117,14 @@ Create invoice
|
||||
assert RagLayer.DOCS_ENTITY_CATALOG in layers
|
||||
assert RagLayer.DOCS_WORKFLOW_INDEX in layers
|
||||
assert RagLayer.DOCS_RELATION_GRAPH in layers
|
||||
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
|
||||
|
||||
catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||
assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice"
|
||||
assert catalog_doc.metadata["module"] == "billing"
|
||||
assert catalog_doc.metadata["domain"] == "billing"
|
||||
assert catalog_doc.metadata["subdomain"] == "invoices"
|
||||
assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing."
|
||||
|
||||
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
|
||||
assert any("has_field amount" in text for text in fact_texts)
|
||||
@@ -108,6 +140,16 @@ Create invoice
|
||||
relation_targets = [doc.metadata["target_id"] for doc in docs if doc.layer == RagLayer.DOCS_RELATION_GRAPH]
|
||||
assert "billing_api" in relation_targets
|
||||
assert "api.billing.validate_invoice" in relation_targets
|
||||
assert "logic.billing.invoice_validation" in relation_targets
|
||||
assert "Invoice" in relation_targets
|
||||
|
||||
chunk_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
|
||||
assert chunk_doc.metadata["section_path"]
|
||||
assert chunk_doc.metadata["artifact_type"] == "DOCS"
|
||||
assert chunk_doc.metadata["domain"] == "billing"
|
||||
assert chunk_doc.metadata["subdomain"] == "invoices"
|
||||
|
||||
integration_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_INTEGRATION_INDEX)
|
||||
assert integration_doc.metadata["target"] == "db.billing.invoices"
|
||||
assert integration_doc.metadata["target_type"] == "db"
|
||||
assert integration_doc.metadata["details"]["transaction"] == "required"
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.intent_router_v2 import IntentRouterV2
|
||||
from app.modules.agent.llm.prompt_loader import PromptLoader
|
||||
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
|
||||
from app.modules.agent.runtime.steps.generation import RuntimePromptSelector
|
||||
from app.core.agent.intent_router import IntentRouterV2
|
||||
from app.core.agent.llm.prompt_loader import PromptLoader
|
||||
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.core.agent.runtime.docs_qa_pipeline.openapi_postprocessor import OpenAPIPostprocessor
|
||||
from app.core.agent.runtime.docs_qa_pipeline.prompt_payload_builder import DocsPromptPayloadBuilder
|
||||
from app.core.agent.orchestration.processes.v2.prompt_payload_builder import V2PromptPayloadBuilder
|
||||
from app.core.agent.runtime.steps.generation import RuntimePromptSelector
|
||||
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
@@ -43,7 +44,7 @@ def test_prompt_selector_uses_docs_prompts_only() -> None:
|
||||
|
||||
def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
builder = DocsPromptPayloadBuilder()
|
||||
from app.modules.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
|
||||
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle, OpenAPIResult
|
||||
|
||||
payload = builder.build(
|
||||
question="Объясни billing",
|
||||
@@ -52,6 +53,8 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
evidence_bundle=DocsEvidenceBundle(
|
||||
intent="DOCUMENTATION_EXPLAIN",
|
||||
sub_intent="COMPONENT_EXPLAIN",
|
||||
primary_documents=[{"title": "Billing"}],
|
||||
secondary_documents=[{"title": "Billing relation"}],
|
||||
documents=[{"title": "Billing"}],
|
||||
facts=[{"content": "Handles payments"}],
|
||||
relations=[{"title": "Billing -> Orders"}],
|
||||
@@ -62,12 +65,36 @@ def test_docs_prompt_payload_contains_required_contract() -> None:
|
||||
assert '"question": "Объясни billing"' in payload
|
||||
assert '"intent": "DOCUMENTATION_EXPLAIN"' in payload
|
||||
assert '"sub_intent": "COMPONENT_EXPLAIN"' in payload
|
||||
assert '"primary_documents"' in payload
|
||||
assert '"secondary_documents"' in payload
|
||||
assert '"documents"' in payload
|
||||
assert '"facts"' in payload
|
||||
assert '"relations"' in payload
|
||||
assert '"api_contract"' in payload
|
||||
|
||||
|
||||
def test_v2_prompt_payload_accepts_api_method_mode_fields() -> None:
|
||||
from app.core.agent.runtime.docs_qa_pipeline.models import DocsEvidenceBundle
|
||||
|
||||
payload = V2PromptPayloadBuilder().build(
|
||||
question="Как работает метод health?",
|
||||
intent="DOCUMENTATION_EXPLAIN",
|
||||
sub_intent="API_METHOD_EXPLAIN",
|
||||
evidence_bundle=DocsEvidenceBundle(intent="DOCUMENTATION_EXPLAIN", sub_intent="API_METHOD_EXPLAIN"),
|
||||
api_method_answer_mode="indirect",
|
||||
target_endpoint_identity={
|
||||
"anchor": "health",
|
||||
"normalized_path": "/health",
|
||||
"normalized_doc_id": "api.health_endpoint",
|
||||
},
|
||||
direct_api_spec_found=False,
|
||||
)
|
||||
|
||||
assert '"api_method_answer_mode": "indirect"' in payload
|
||||
assert '"normalized_doc_id": "api.health_endpoint"' in payload
|
||||
assert '"direct_api_spec_found": false' in payload
|
||||
|
||||
|
||||
def test_openapi_postprocessor_requires_paths_for_full_spec() -> None:
|
||||
validator = OpenAPIPostprocessor()
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.agent.intent_router_v2 import IntentRouterV2
|
||||
from app.modules.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner
|
||||
from app.core.agent.intent_router import IntentRouterV2
|
||||
from app.core.agent.runtime.docs_qa_pipeline import DocsQAPipelineRunner, DocsTaskPlanner
|
||||
from tests.docs_qa_eval.fixture_adapter import InMemoryDocsRetrievalAdapter
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
@@ -135,6 +135,52 @@ def test_openapi_partial_contract_returns_partial_mode() -> None:
|
||||
assert "/orders" in result.answer
|
||||
|
||||
|
||||
def test_docs_pipeline_accepts_precomputed_task_plan_without_rerouting() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D2_FACT_INDEX",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:response",
|
||||
"content": "Returns health summary and component diagnostics.",
|
||||
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
|
||||
},
|
||||
]
|
||||
route_result = IntentRouterV2().route(
|
||||
"Объясни API метод /health",
|
||||
repo_context=repo_context(),
|
||||
)
|
||||
task_plan = DocsTaskPlanner().plan(
|
||||
"Объясни API метод /health",
|
||||
"docs-session",
|
||||
route_result=route_result,
|
||||
)
|
||||
|
||||
class FailingRouter:
|
||||
def route(self, *_args, **_kwargs):
|
||||
raise AssertionError("runner should use the precomputed task plan")
|
||||
|
||||
runner = DocsQAPipelineRunner(FailingRouter(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run(
|
||||
"Объясни API метод /health",
|
||||
"docs-session",
|
||||
mode="pre_llm_only",
|
||||
task_plan=task_plan,
|
||||
)
|
||||
|
||||
assert result.router_result.intent == "DOCUMENTATION_EXPLAIN"
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_pre_llm_mode_returns_diagnostic_only_without_answer_generation() -> None:
|
||||
rows = [
|
||||
{
|
||||
@@ -172,7 +218,7 @@ def test_pre_llm_mode_detects_path_anchor_candidates() -> None:
|
||||
|
||||
assert "/health" in result.diagnostics.query_anchor_candidates
|
||||
assert "/health" in result.diagnostics.resolved_anchor_candidates
|
||||
assert result.diagnostics.planned_layers == ["D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
assert result.diagnostics.planned_layers == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS", "D4_WORKFLOW_INDEX"]
|
||||
assert set(result.diagnostics.executed_layers) == {"D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D4_WORKFLOW_INDEX", "D0_DOC_CHUNKS"}
|
||||
|
||||
|
||||
@@ -318,8 +364,311 @@ def test_openapi_request_fragment_uses_fragment_aware_gate() -> None:
|
||||
assert result.answer_mode in {"ready", "ready_partial"}
|
||||
assert result.answer
|
||||
assert "type: object" in result.answer
|
||||
assert "message:" in result.answer
|
||||
assert "chat_id:" in result.answer
|
||||
|
||||
|
||||
def test_api_method_explain_prefers_api_method_primary_doc() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health describes overall service health.",
|
||||
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D5_RELATION_GRAPH",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health links",
|
||||
"content": "runtime health used by health endpoint",
|
||||
"metadata": {"document_id": "domain.runtime_health", "target_doc_id": "api.health_endpoint"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_anchor in {"health", "/health"}
|
||||
assert result.diagnostics.api_method_match_found is True
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert "api.health_endpoint" in result.diagnostics.primary_doc_candidates
|
||||
assert result.diagnostics.evidence_gate_require_target_api_spec is True
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is True
|
||||
assert result.answer_mode == "exact"
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_api_method_explain_promotes_api_doc_via_links() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health is the domain model for observability.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:Overview",
|
||||
"content": "Endpoint /health returns overall runtime status and component diagnostics.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает health endpoint?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.diagnostics.promoted_via_links == ["api.health_endpoint"]
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.api_method_match_found is True
|
||||
|
||||
|
||||
def test_api_method_explain_rejects_cross_endpoint_primary_candidates() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/control-actions-endpoint.md",
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"content": "Endpoint for controlling actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/health-endpoint.md",
|
||||
"title": "HTTP API /health",
|
||||
"content": "Health endpoint returns runtime health and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "api.health_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/health",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/documentation/api/actions-endpoint.md",
|
||||
"title": "api.control_actions_endpoint:Scenario",
|
||||
"content": "The /actions/{action} endpoint triggers runtime actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
|
||||
assert result.diagnostics.selected_primary_documents == ["api.health_endpoint"]
|
||||
assert result.diagnostics.primary_api_documents_after_filter == ["api.health_endpoint"]
|
||||
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert result.diagnostics.cross_endpoint_leakage_detected is True
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is True
|
||||
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
|
||||
|
||||
def test_api_method_explain_without_exact_target_returns_insufficiency() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/control-actions-endpoint.md",
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"content": "Endpoint for controlling actions.",
|
||||
"metadata": {
|
||||
"document_id": "api.control_actions_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/actions/{action}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/send-endpoint.md",
|
||||
"title": "HTTP API /send",
|
||||
"content": "Endpoint for sending messages.",
|
||||
"metadata": {
|
||||
"document_id": "api.send_message_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/send",
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.diagnostics.target_endpoint_identity["normalized_doc_id"] == "api.health_endpoint"
|
||||
assert result.diagnostics.selected_primary_documents == []
|
||||
assert "api.control_actions_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert "api.send_message_endpoint" in result.diagnostics.rejected_endpoint_candidates
|
||||
assert result.diagnostics.target_api_spec_found_exact is False
|
||||
assert result.diagnostics.evidence_gate_target_api_spec_found is False
|
||||
assert result.diagnostics.gate_decision == "reject"
|
||||
assert result.answer_mode == "insufficient"
|
||||
assert "api.control_actions_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
assert "api.send_message_endpoint" not in result.diagnostics.selected_doc_ids
|
||||
|
||||
|
||||
def test_api_method_explain_uses_indirect_mode_from_target_linked_docs() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health",
|
||||
"content": "Runtime health describes overall service state and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.answer_mode == "indirect"
|
||||
assert result.diagnostics.gate_decision == "allow_indirect"
|
||||
assert result.diagnostics.raw_retrieval_non_empty is True
|
||||
assert result.diagnostics.target_primary_context_non_empty is False
|
||||
assert result.diagnostics.indirect_target_context_non_empty is True
|
||||
assert result.diagnostics.graph_promotion_attempted is True
|
||||
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
|
||||
assert result.diagnostics.promoted_target_loaded is False
|
||||
assert result.diagnostics.materialization_failure_reason == "materialized_rows_empty"
|
||||
assert result.diagnostics.final_primary_document_ids == []
|
||||
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
|
||||
|
||||
|
||||
def test_api_method_explain_skips_llm_when_no_exact_or_indirect_context() -> None:
|
||||
from tests.unit_tests.rag.test_docs_prompt_layer import FakeLlm
|
||||
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/documentation/api/send-endpoint.md",
|
||||
"title": "HTTP API /send",
|
||||
"content": "Endpoint for sending messages.",
|
||||
"metadata": {
|
||||
"document_id": "api.send_message_endpoint",
|
||||
"type": "api_method",
|
||||
"endpoint": "/send",
|
||||
},
|
||||
}
|
||||
]
|
||||
llm = FakeLlm("should not be called")
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context(), llm=llm)
|
||||
|
||||
result = runner.run("Что делает метод health?", "docs-session")
|
||||
|
||||
assert llm.calls == []
|
||||
assert result.answer_mode == "insufficient"
|
||||
assert result.diagnostics.llm_called is False
|
||||
assert result.diagnostics.llm_call_reason == "no_exact_or_indirect_target_context"
|
||||
assert result.diagnostics.gate_decision == "reject"
|
||||
|
||||
|
||||
def test_api_method_explain_materializes_promoted_target_into_primary_context() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Runtime health",
|
||||
"content": "Runtime health describes service state and component diagnostics.",
|
||||
"metadata": {
|
||||
"document_id": "domain.runtime_health",
|
||||
"type": "domain_entity",
|
||||
"links": [{"target": "api.health_endpoint", "type": "used_by"}],
|
||||
},
|
||||
}
|
||||
]
|
||||
materialized_rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime and component statuses.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
{
|
||||
"layer": "D2_FACT_INDEX",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:response",
|
||||
"content": "Returns health summary and component diagnostics.",
|
||||
"metadata": {"subject_id": "api.health_endpoint", "type": "api_method"},
|
||||
},
|
||||
{
|
||||
"layer": "D0_DOC_CHUNKS",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "api.health_endpoint:Overview",
|
||||
"content": "Endpoint /health returns overall runtime health.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(
|
||||
IntentRouterV2(),
|
||||
InMemoryDocsRetrievalAdapter(rows, materialized_rows=materialized_rows),
|
||||
repo_context=repo_context(),
|
||||
)
|
||||
|
||||
result = runner.run("Как работает метод health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.answer_mode == "exact"
|
||||
assert result.diagnostics.graph_promotion_hits == ["api.health_endpoint"]
|
||||
assert result.diagnostics.graph_promotion_materialized == ["api.health_endpoint"]
|
||||
assert result.diagnostics.promoted_target_loaded is True
|
||||
assert result.diagnostics.promoted_target_chunks_loaded == 1
|
||||
assert result.diagnostics.promoted_target_facts_loaded == 1
|
||||
assert result.diagnostics.pinned_document_ids == ["api.health_endpoint"]
|
||||
assert result.diagnostics.final_primary_document_ids == ["api.health_endpoint"]
|
||||
assert "domain.runtime_health" in result.diagnostics.final_secondary_document_ids
|
||||
assert result.diagnostics.materialized_target_primary_context_non_empty is True
|
||||
assert result.diagnostics.gate_decision == "allow_exact"
|
||||
|
||||
|
||||
def test_entity_question_does_not_prefer_api_method_primary_doc() -> None:
|
||||
rows = [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/domain/runtime-health.md",
|
||||
"title": "Сущность runtime health",
|
||||
"content": "Runtime health describes service state.",
|
||||
"metadata": {"document_id": "domain.runtime_health", "type": "domain_entity"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/health.md",
|
||||
"title": "GET /health",
|
||||
"content": "/health returns runtime status.",
|
||||
"metadata": {"document_id": "api.health_endpoint", "type": "api_method", "endpoint": "/health"},
|
||||
},
|
||||
]
|
||||
runner = DocsQAPipelineRunner(IntentRouterV2(), InMemoryDocsRetrievalAdapter(rows), repo_context=repo_context())
|
||||
|
||||
result = runner.run("Что такое runtime health?", "docs-session", mode="pre_llm_only")
|
||||
|
||||
assert result.router_result.query_plan.sub_intent == "ENTITY_EXPLAIN"
|
||||
assert result.diagnostics.selected_primary_documents == []
|
||||
assert result.diagnostics.api_method_match_found is False
|
||||
assert result.answer == ""
|
||||
|
||||
|
||||
def test_openapi_method_with_only_path_is_rejected() -> None:
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
import pytest
|
||||
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("query", "plan_id", "primary_doc_types", "secondary_doc_types", "expected_filter_key", "expected_filter_value"),
|
||||
[
|
||||
("объясни /health", "docs_api_method_explain_v1", ["api_method"], ["logic_block", "domain_entity", "architecture_overview"], "endpoint_path", "/health"),
|
||||
("какие методы в notifications", "docs_list_api_methods_v1", ["api_method"], [], "domain_name", "notifications"),
|
||||
("найди документацию по telegram_delivery", "docs_find_documents_by_domain_v1", ["index_page", "architecture_overview", "api_method", "logic_block", "domain_entity"], [], "domain_name", "telegram_delivery"),
|
||||
("сгенерируй openapi по /send", "docs_generate_openapi_v1", ["api_method"], ["domain_entity", "logic_block"], "endpoint_path", "/send"),
|
||||
("как устроен сервис", "docs_general_docs_qa_v1", ["index_page", "architecture_overview"], ["logic_block", "domain_entity", "api_method"], "scope_level", "project"),
|
||||
],
|
||||
)
|
||||
def test_docs_retrieval_plan_contracts(
|
||||
query: str,
|
||||
plan_id: str,
|
||||
primary_doc_types: list[str],
|
||||
secondary_doc_types: list[str],
|
||||
expected_filter_key: str,
|
||||
expected_filter_value: str,
|
||||
) -> None:
|
||||
result = run_sequence([query])[0]
|
||||
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == plan_id
|
||||
assert result.retrieval_plan.primary_doc_types == primary_doc_types
|
||||
assert result.retrieval_plan.secondary_doc_types == secondary_doc_types
|
||||
assert result.retrieval_plan.filters[expected_filter_key] == expected_filter_value
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
|
||||
from app.core.agent.runtime.steps.explain.intent_builder import ExplainIntentBuilder
|
||||
|
||||
|
||||
def test_explain_intent_builder_extracts_route_symbol_and_file_hints() -> None:
|
||||
|
||||
@@ -2,13 +2,12 @@ import os
|
||||
|
||||
import pytest
|
||||
|
||||
from app.modules.agent.intent_router_v2.factory import GigaChatIntentRouterFactory
|
||||
from app.modules.shared.env_loader import load_workspace_env
|
||||
from app.core.agent.intent_router.factory import GigaChatIntentRouterFactory
|
||||
from app.core.shared.config import load_workspace_env
|
||||
from tests.unit_tests.rag.asserts_intent_router import (
|
||||
assert_domains,
|
||||
assert_file_only_scope,
|
||||
assert_intent,
|
||||
assert_test_policy,
|
||||
assert_path_scope,
|
||||
)
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
@@ -29,7 +28,7 @@ def test_e2e_path_carryover_flow() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
|
||||
assert "app/core/config.py" in third.retrieval_spec.filters.path_scope
|
||||
second_file_anchors = [anchor.value for anchor in second.query_plan.anchors if anchor.type == "FILE_PATH" and anchor.source == "conversation_state"]
|
||||
@@ -39,7 +38,10 @@ def test_e2e_path_carryover_flow() -> None:
|
||||
assert any(anchor.type == "FILE_PATH" and anchor.source == "conversation_state" and anchor.span is None for anchor in third.query_plan.anchors)
|
||||
carried_symbols = [anchor.value for anchor in third.query_plan.anchors if anchor.type == "SYMBOL" and anchor.source == "conversation_state"]
|
||||
assert carried_symbols in ([], ["load_config"])
|
||||
assert third.query_plan.sub_intent == "EXPLAIN_LOCAL"
|
||||
assert_intent(first, "GENERAL_QA")
|
||||
assert_intent(second, "GENERAL_QA")
|
||||
assert_intent(third, "GENERAL_QA")
|
||||
assert third.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in third.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
|
||||
@@ -52,9 +54,9 @@ def test_e2e_docs_switch_from_code_topic() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_intent(first, "CODE_QA")
|
||||
assert_intent(first, "DOCUMENTATION_EXPLAIN")
|
||||
assert_intent(second, "DOCUMENTATION_EXPLAIN")
|
||||
assert second.conversation_mode == "SWITCH"
|
||||
assert second.conversation_mode == "CONTINUE"
|
||||
assert_domains(second, ["DOCS"])
|
||||
carried = [
|
||||
anchor
|
||||
@@ -75,12 +77,10 @@ def test_e2e_tests_toggle_flow() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_intent(first, "CODE_QA")
|
||||
assert_intent(second, "CODE_QA")
|
||||
assert_test_policy(first, "INCLUDE")
|
||||
assert_test_policy(second, "EXCLUDE")
|
||||
assert first.query_plan.sub_intent == "FIND_TESTS"
|
||||
assert second.query_plan.sub_intent == "EXPLAIN"
|
||||
assert_intent(first, "GENERAL_QA")
|
||||
assert_intent(second, "GENERAL_QA")
|
||||
assert first.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert "tests" in second.query_plan.negations
|
||||
assert not second.query_plan.expansions
|
||||
assert second.evidence_policy.require_flow is False
|
||||
@@ -94,9 +94,9 @@ def test_e2e_open_file_then_generic_next_steps_is_lightweight() -> None:
|
||||
]
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_file_only_scope(second, "app/core/config.py")
|
||||
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert_path_scope(second, "app/core/config.py", "app/core")
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert second.evidence_policy.require_flow is False
|
||||
@@ -118,9 +118,9 @@ def test_intent_router_live_smoke_path_carryover() -> None:
|
||||
trace_label="intent-router-live",
|
||||
)
|
||||
|
||||
assert_file_only_scope(first, "app/core/config.py")
|
||||
assert_path_scope(first, "app/core/config.py", "app/core")
|
||||
assert "app/core/config.py" in second.retrieval_spec.filters.path_scope
|
||||
assert second.query_plan.sub_intent in {"EXPLAIN_LOCAL", "NEXT_STEPS"}
|
||||
assert second.query_plan.sub_intent == "GENERIC_QA"
|
||||
layer_ids = [item.layer_id for item in second.retrieval_spec.layer_queries]
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert second.evidence_policy.require_flow is False
|
||||
|
||||
@@ -1,204 +1,121 @@
|
||||
import pytest
|
||||
|
||||
from tests.unit_tests.rag.asserts_intent_router import (
|
||||
assert_domain_layer_prefixes,
|
||||
assert_domains,
|
||||
assert_file_only_scope,
|
||||
assert_has_file_path,
|
||||
assert_intent,
|
||||
assert_no_symbol_keyword,
|
||||
assert_no_symbol_leakage_from_paths,
|
||||
assert_spans_valid,
|
||||
assert_sub_intent,
|
||||
assert_test_policy,
|
||||
)
|
||||
from tests.unit_tests.rag.intent_router_testkit import run_sequence
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
def test_invariant_code_file_path_with_canonical_key_term() -> None:
|
||||
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_has_file_path(result, "app/core/config.py")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "файл" in key_terms
|
||||
assert "файлу" not in key_terms
|
||||
assert_spans_valid(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
def _docs_result(query: str):
|
||||
result = run_sequence([query])[0]
|
||||
assert result.docs_routing is not None
|
||||
assert result.retrieval_plan is not None
|
||||
return result
|
||||
|
||||
|
||||
def test_invariant_open_file_for_specified_file_phrase_uses_narrow_layers() -> None:
|
||||
result = run_sequence(["Уточни по файлу app/core/config.py"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "endpoint"),
|
||||
[
|
||||
("как работает метод health", "/health"),
|
||||
("объясни /health", "/health"),
|
||||
("что делает endpoint /send", "/send"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_api_method_explain_cases(query: str, endpoint: str) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_sub_intent(result, "OPEN_FILE")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["C0_SOURCE_CHUNKS"]
|
||||
assert result.evidence_policy.require_flow is False
|
||||
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.docs_routing.intent == "DOCS_QA"
|
||||
assert result.docs_routing.scope.level == "method"
|
||||
assert result.docs_routing.anchors.endpoint_path == endpoint
|
||||
assert result.retrieval_plan.plan_id == "docs_api_method_explain_v1"
|
||||
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
|
||||
|
||||
|
||||
def test_invariant_inline_code_span_routes_to_code_and_extracts_symbol() -> None:
|
||||
result = run_sequence(["Уточни по коду `def build(x): return x`"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "scope_level", "domain_name"),
|
||||
[
|
||||
("какие есть методы в проекте", "project", None),
|
||||
("покажи все api", "project", None),
|
||||
("какие методы в notifications", "domain", "notifications"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_list_api_methods_cases(query: str, scope_level: str, domain_name: str | None) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_spans_valid(result)
|
||||
assert_no_symbol_keyword(result)
|
||||
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "build" in symbols
|
||||
assert "def" in key_terms
|
||||
assert result.docs_routing.sub_intent == "LIST_API_METHODS"
|
||||
assert result.docs_routing.intent == "DOCS_DISCOVERY"
|
||||
assert result.docs_routing.scope.level == scope_level
|
||||
assert result.retrieval_plan.plan_id == "docs_list_api_methods_v1"
|
||||
assert result.retrieval_plan.primary_doc_types == ["api_method"]
|
||||
if domain_name:
|
||||
assert result.retrieval_plan.filters["domain_name"] == domain_name
|
||||
|
||||
|
||||
def test_invariant_docs_cyrillic_path_with_quotes() -> None:
|
||||
result = run_sequence(["Что сказано в «docs/архитектура.md»?"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "domain_name", "subdomain_name", "entity_name"),
|
||||
[
|
||||
("какие документы есть по notifications", "notifications", None, None),
|
||||
("найди документацию по telegram_delivery", "telegram_delivery", None, None),
|
||||
("какие документы связаны с health", None, None, "health"),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_find_documents_cases(
|
||||
query: str,
|
||||
domain_name: str | None,
|
||||
subdomain_name: str | None,
|
||||
entity_name: str | None,
|
||||
) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "COMPONENT_EXPLAIN")
|
||||
assert_domains(result, ["DOCS"])
|
||||
assert "docs/архитектура.md" in result.query_plan.normalized
|
||||
assert_has_file_path(result, "docs/архитектура.md")
|
||||
assert any(anchor.type == "DOC_REF" for anchor in result.query_plan.anchors)
|
||||
assert result.retrieval_spec.filters.doc_kinds == []
|
||||
assert_spans_valid(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
|
||||
assert result.docs_routing.intent == "DOCS_DISCOVERY"
|
||||
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
|
||||
if domain_name:
|
||||
assert result.retrieval_plan.filters["domain_name"] == domain_name
|
||||
if subdomain_name:
|
||||
assert result.retrieval_plan.filters["subdomain_name"] == subdomain_name
|
||||
if entity_name:
|
||||
assert result.retrieval_plan.filters["entity_name"] == entity_name
|
||||
|
||||
|
||||
def test_invariant_file_check_phrase_not_project_misc() -> None:
|
||||
result = run_sequence(["Проверь app/modules/rag/explain/intent_builder.py и объясни"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
("query", "scope_level", "endpoint"),
|
||||
[
|
||||
("сгенерируй openapi по /health", "method", "/health"),
|
||||
("собери swagger по notifications", "domain", None),
|
||||
("сделай спецификацию api по всему проекту", "project", None),
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_generate_openapi_cases(query: str, scope_level: str, endpoint: str | None) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_domains(result, ["CODE"])
|
||||
assert_no_symbol_leakage_from_paths(result)
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.docs_routing.sub_intent == "GENERATE_OPENAPI"
|
||||
assert result.docs_routing.intent == "DOCS_GENERATION"
|
||||
assert result.docs_routing.scope.level == scope_level
|
||||
assert result.retrieval_plan.plan_id == "docs_generate_openapi_v1"
|
||||
if endpoint:
|
||||
assert result.retrieval_plan.filters["endpoint_path"] == endpoint
|
||||
|
||||
|
||||
def test_invariant_tests_include_routing() -> None:
|
||||
result = run_sequence(["Где тесты на ConfigManager?"])[0]
|
||||
@pytest.mark.parametrize(
|
||||
"query",
|
||||
[
|
||||
"что делает это приложение",
|
||||
"как устроен сервис",
|
||||
"как связаны worker и api",
|
||||
],
|
||||
)
|
||||
def test_docs_mvp_general_docs_qa_cases(query: str) -> None:
|
||||
result = _docs_result(query)
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_test_policy(result, "INCLUDE")
|
||||
symbols = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "SYMBOL"]
|
||||
key_terms = [anchor.value for anchor in result.query_plan.anchors if anchor.type == "KEY_TERM"]
|
||||
assert "ConfigManager" in symbols
|
||||
assert "тест" in key_terms
|
||||
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
|
||||
assert result.docs_routing.intent == "DOCS_FALLBACK"
|
||||
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
|
||||
|
||||
|
||||
def test_invariant_keyword_hints_and_expansions_for_function_identifier() -> None:
|
||||
result = run_sequence(["Теперь объясни функцию load_config"])[0]
|
||||
def test_docs_mvp_retrieval_filters_are_merged_into_legacy_spec() -> None:
|
||||
result = _docs_result("какие методы в notifications")
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert "load_config" in result.query_plan.keyword_hints
|
||||
assert "функция" not in result.query_plan.keyword_hints
|
||||
assert "def" not in result.query_plan.expansions
|
||||
|
||||
|
||||
def test_invariant_open_file_sub_intent_uses_narrow_retrieval_profile() -> None:
|
||||
result = run_sequence(["Открой файл app/core/config.py"])[0]
|
||||
|
||||
assert_intent(result, "CODE_QA")
|
||||
assert_sub_intent(result, "OPEN_FILE")
|
||||
assert_file_only_scope(result, "app/core/config.py")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert "C0_SOURCE_CHUNKS" in layer_ids
|
||||
assert "C1_SYMBOL_CATALOG" not in layer_ids
|
||||
assert "C2_DEPENDENCY_GRAPH" not in layer_ids
|
||||
assert "C3_ENTRYPOINTS" not in layer_ids
|
||||
assert result.evidence_policy.require_flow is False
|
||||
|
||||
|
||||
def test_invariant_docs_question_routes_to_docs() -> None:
|
||||
result = run_sequence(["Что сказано в документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_domains(result, ["DOCS"])
|
||||
assert_domain_layer_prefixes(result)
|
||||
assert result.query_plan.keyword_hints
|
||||
assert any(item in result.query_plan.expansions for item in result.query_plan.keyword_hints)
|
||||
|
||||
|
||||
def test_invariant_docs_flow_sub_intent_uses_workflow_layers() -> None:
|
||||
result = run_sequence(["Как работает процесс создания заказа по документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D4_WORKFLOW_INDEX", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_docs_entity_sub_intent_uses_entity_layers() -> None:
|
||||
result = run_sequence(["Что такое сущность Order в документации?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "ENTITY_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D3_ENTITY_CATALOG", "D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_entity_like_camel_case_prefers_entity_explain() -> None:
|
||||
result = run_sequence(["Что такое WorkerHealth?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "ENTITY_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_related_docs_routes_to_docs_explain() -> None:
|
||||
result = run_sequence(["Найди документацию по billing"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_docs_navigation_uses_related_docs_explain() -> None:
|
||||
result = run_sequence(["Что связано с checkout документацией?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "RELATED_DOCS_EXPLAIN")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D5_RELATION_GRAPH", "D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_openapi_routes_to_docs_layers_with_api_filter() -> None:
|
||||
result = run_sequence(["Сгенерируй openapi yaml для создания заказа"])[0]
|
||||
|
||||
assert_intent(result, "OPENAPI_GENERATION")
|
||||
assert_sub_intent(result, "OPENAPI_METHOD_GENERATE")
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D2_FACT_INDEX", "D0_DOC_CHUNKS"]
|
||||
assert result.retrieval_spec.filters.doc_type == "api_method"
|
||||
|
||||
|
||||
def test_invariant_general_qa_routes_to_generic_docs_layers() -> None:
|
||||
result = run_sequence(["Помоги разобраться"])[0]
|
||||
|
||||
assert_intent(result, "GENERAL_QA")
|
||||
assert_sub_intent(result, "GENERIC_QA")
|
||||
assert_domains(result, ["DOCS"])
|
||||
layer_ids = [item.layer_id for item in result.retrieval_spec.layer_queries]
|
||||
assert layer_ids == ["D1_DOCUMENT_CATALOG", "D0_DOC_CHUNKS"]
|
||||
|
||||
|
||||
def test_invariant_component_like_manager_routes_to_component_explain() -> None:
|
||||
result = run_sequence(["Какую роль в системе играет RuntimeManager?"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "COMPONENT_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_cycle_query_routes_to_system_flow_explain() -> None:
|
||||
result = run_sequence(["Объясни как работает цикл отправки уведомлений"])[0]
|
||||
|
||||
assert_intent(result, "DOCUMENTATION_EXPLAIN")
|
||||
assert_sub_intent(result, "SYSTEM_FLOW_EXPLAIN")
|
||||
|
||||
|
||||
def test_invariant_overview_question_routes_to_general_qa() -> None:
|
||||
result = run_sequence(["Что вообще описано в документации по этому сервису?"])[0]
|
||||
|
||||
assert_intent(result, "GENERAL_QA")
|
||||
assert_sub_intent(result, "GENERIC_QA")
|
||||
assert getattr(result.retrieval_spec.filters, "doc_type", None) == "api_method"
|
||||
assert getattr(result.retrieval_spec.filters, "domain_name", None) == "notifications"
|
||||
assert getattr(result.retrieval_spec.filters, "scope_level", None) == "domain"
|
||||
|
||||
@@ -2,9 +2,9 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.modules.agent.intent_router_v2 import ConversationState, IntentRouterV2
|
||||
from app.modules.agent.intent_router_v2.intent.classifier import IntentClassifierV2
|
||||
from app.modules.agent.intent_router_v2.intent.llm_disambiguator import DocsLlmDisambiguator
|
||||
from app.core.agent.intent_router import ConversationState, IntentRouterV2
|
||||
from app.core.agent.intent_router.docs_mvp.llm_classifier import DocsMvpLlmClassifier
|
||||
from app.core.agent.intent_router.intent.classifier import IntentClassifierV2
|
||||
from tests.unit_tests.rag.intent_router_testkit import repo_context
|
||||
|
||||
|
||||
@@ -21,75 +21,62 @@ class FakeLlm:
|
||||
return self.response
|
||||
|
||||
|
||||
def test_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"unused","confidence":"low"}')
|
||||
router = IntentRouterV2(
|
||||
def _router(llm: FakeLlm) -> IntentRouterV2:
|
||||
return IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
docs_llm_classifier=DocsMvpLlmClassifier(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
|
||||
result = router.route("Объясни endpoint /health", ConversationState(), repo_context())
|
||||
|
||||
assert result.query_plan.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.is_ambiguous is False
|
||||
assert result.routing_mode == "deterministic"
|
||||
def test_docs_technical_query_keeps_deterministic_routing_without_llm_call() -> None:
|
||||
llm = FakeLlm("{}")
|
||||
result = _router(llm).route("Объясни endpoint /health", ConversationState(), repo_context())
|
||||
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.sub_intent == "API_METHOD_EXPLAIN"
|
||||
assert result.docs_routing.routing_mode == "deterministic"
|
||||
assert result.llm_router_used is False
|
||||
assert llm.calls == []
|
||||
|
||||
|
||||
def test_ambiguous_query_can_be_resolved_by_llm() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"ENTITY_EXPLAIN","reason":"runtime health is a concept/entity here","confidence":"medium"}')
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
llm = FakeLlm(
|
||||
json.dumps(
|
||||
{
|
||||
"intent": "DOCS_DISCOVERY",
|
||||
"sub_intent": "FIND_DOCUMENTS_BY_DOMAIN",
|
||||
"confidence": 0.83,
|
||||
"anchors": {"entity_name": "health", "doc_query": "документация по health"},
|
||||
"scope": {"level": "domain"},
|
||||
"reason_short": "health here is a docs topic",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
result = _router(llm).route("документация по health", ConversationState(), repo_context())
|
||||
|
||||
result = router.route("Объясни runtime health", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is True
|
||||
assert result.routing_mode == "llm_disambiguation"
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.routing_mode == "llm_assisted"
|
||||
assert result.docs_routing.sub_intent == "FIND_DOCUMENTS_BY_DOMAIN"
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == "docs_find_documents_by_domain_v1"
|
||||
assert result.llm_router_used is True
|
||||
assert result.deterministic_selected_sub_intent
|
||||
assert result.llm_router_selected_sub_intent == "ENTITY_EXPLAIN"
|
||||
assert result.query_plan.sub_intent == "ENTITY_EXPLAIN"
|
||||
|
||||
|
||||
def test_ambiguous_query_falls_back_to_deterministic_when_llm_fails() -> None:
|
||||
def test_ambiguous_query_falls_back_to_general_docs_when_llm_fails() -> None:
|
||||
llm = FakeLlm("{}", fail=True)
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
result = _router(llm).route("health документация", ConversationState(), repo_context())
|
||||
|
||||
result = router.route("Как работает health check runtime?", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is True
|
||||
assert result.routing_mode == "deterministic_fallback"
|
||||
assert result.llm_router_used is False
|
||||
assert result.llm_router_error == "llm unavailable"
|
||||
assert result.query_plan.sub_intent == result.deterministic_selected_sub_intent
|
||||
assert result.docs_routing is not None
|
||||
assert result.docs_routing.routing_mode == "llm_fallback"
|
||||
assert result.docs_routing.sub_intent == "GENERAL_DOCS_QA"
|
||||
assert result.retrieval_plan is not None
|
||||
assert result.retrieval_plan.plan_id == "docs_general_docs_qa_v1"
|
||||
|
||||
|
||||
def test_overview_query_stays_in_generic_qa() -> None:
|
||||
llm = FakeLlm('{"sub_intent":"GENERIC_QA","reason":"overview query","confidence":"high"}')
|
||||
router = IntentRouterV2(
|
||||
classifier=IntentClassifierV2(),
|
||||
llm_disambiguator=DocsLlmDisambiguator(llm),
|
||||
enable_llm_disambiguation=True,
|
||||
)
|
||||
def test_llm_classifier_rejects_unknown_labels() -> None:
|
||||
llm = FakeLlm(json.dumps({"intent": "DOCS_QA", "sub_intent": "MADE_UP"}))
|
||||
classifier = DocsMvpLlmClassifier(llm)
|
||||
|
||||
result = router.route("Какая структура документации?", ConversationState(), repo_context())
|
||||
|
||||
assert result.is_ambiguous is False or result.query_plan.sub_intent == "GENERIC_QA"
|
||||
assert result.intent == "GENERAL_QA"
|
||||
assert result.query_plan.sub_intent == "GENERIC_QA"
|
||||
|
||||
|
||||
def test_llm_disambiguator_rejects_unknown_labels() -> None:
|
||||
llm = FakeLlm(json.dumps({"sub_intent": "MADE_UP", "reason": "bad", "confidence": "high"}))
|
||||
disambiguator = DocsLlmDisambiguator(llm)
|
||||
|
||||
assert disambiguator.choose({"query": "test"}) is None
|
||||
assert classifier.classify({"query": "test"}) is None
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
|
||||
from app.core.agent.runtime.steps.explain.layered_gateway import LayeredRetrievalGateway
|
||||
|
||||
|
||||
class _Embedder:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.indexing.common.path_filter import (
|
||||
from app.core.rag.indexing.common.path_filter import (
|
||||
count_indexable_change_upserts,
|
||||
filter_changes_for_indexing,
|
||||
filter_snapshot_files,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from app.modules.agent.intent_router_v2.analysis.normalization import QueryNormalizer
|
||||
from app.core.agent.intent_router.analysis.normalization import QueryNormalizer
|
||||
|
||||
pytestmark = pytest.mark.intent_router
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
from app.core.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
def test_extract_query_terms_from_code_question() -> None:
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from app.modules.rag.services.rag_service import RagService
|
||||
from app.core.rag.contracts.enums import RagLayer
|
||||
from app.core.rag.indexing.service import RagService
|
||||
|
||||
|
||||
class _FakeEmbedder:
|
||||
@@ -50,3 +51,40 @@ def test_rag_service_progress_uses_only_indexable_files() -> None:
|
||||
assert cache_hits == 0
|
||||
assert cache_misses == 1
|
||||
assert progress == [(1, 1, "src/main.py")]
|
||||
|
||||
|
||||
def test_rag_service_keeps_docs_artifact_type_metadata() -> None:
|
||||
repository = _FakeRepository()
|
||||
service = RagService(embedder=_FakeEmbedder(), repository=repository)
|
||||
files = [
|
||||
{
|
||||
"path": "docs/api/health.md",
|
||||
"content_hash": "docs-h1",
|
||||
"content": """---
|
||||
id: api.health
|
||||
type: api_method
|
||||
doc_type: api_method
|
||||
title: Health API
|
||||
domain: runtime
|
||||
sub_domain: health
|
||||
related_docs: []
|
||||
status: active
|
||||
---
|
||||
# Health API
|
||||
|
||||
## Summary
|
||||
- Purpose: check service health.
|
||||
|
||||
## Details
|
||||
### Описание
|
||||
Returns health payload.
|
||||
""",
|
||||
}
|
||||
]
|
||||
|
||||
asyncio.run(service.index_snapshot("project-1", files))
|
||||
|
||||
doc_chunk = next(doc for doc in repository.replaced_docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS)
|
||||
assert doc_chunk.metadata["artifact_type"] == "DOCS"
|
||||
assert doc_chunk.metadata["domain"] == "runtime"
|
||||
assert doc_chunk.metadata["subdomain"] == "health"
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
"""Smoke-тест стандартного retrieval API: один embed и вызов repository."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder
|
||||
from app.core.rag.retrieval.session_retriever import RagSessionRetriever, RetrievalPlan
|
||||
|
||||
|
||||
def test_rag_session_retriever_calls_repository() -> None:
|
||||
embedder = MagicMock(spec=GigaChatEmbedder)
|
||||
embedder.embed = MagicMock(return_value=[[0.1, 0.2]])
|
||||
repo = MagicMock()
|
||||
repo.retrieve = MagicMock(return_value=[{"path": "a.md", "layer": "D0_DOC_CHUNKS"}])
|
||||
retriever = RagSessionRetriever(repository=repo, embedder=embedder)
|
||||
plan = RetrievalPlan(profile="test", layers=["D0_DOC_CHUNKS", "D1_DOCUMENT_CATALOG"], limit=5)
|
||||
rows = asyncio.run(retriever.retrieve("sid-1", "hello", plan))
|
||||
assert len(rows) == 1
|
||||
assert embedder.embed.called
|
||||
assert repo.retrieve.called
|
||||
call_kw = repo.retrieve.call_args
|
||||
assert call_kw[0][0] == "sid-1"
|
||||
assert call_kw[1]["layers"] == plan.layers
|
||||
assert call_kw[1]["limit"] == 5
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
|
||||
from app.modules.rag.retrieval.test_filter import build_test_filters, is_test_path
|
||||
from app.core.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder
|
||||
from app.core.rag.retrieval.test_filter import build_test_filters, is_test_path
|
||||
|
||||
|
||||
def test_retrieve_builder_adds_test_exclusion_filters() -> None:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app.modules.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
|
||||
from app.core.agent.runtime.steps.explain import CodeExplainRetrieverV2, LayeredRetrievalGateway
|
||||
|
||||
|
||||
class _ExplodingEmbedder:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class _FakeGateway:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.retriever_v2 import CodeExplainRetrieverV2
|
||||
|
||||
|
||||
class _ProductionFirstGateway:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from app.modules.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.modules.agent.runtime.steps.explain.trace_builder import TraceBuilder
|
||||
from app.core.agent.runtime.steps.explain.models import CodeLocation, LayeredRetrievalItem
|
||||
from app.core.agent.runtime.steps.explain.trace_builder import TraceBuilder
|
||||
|
||||
|
||||
class _FakeGraphRepository:
|
||||
|
||||
Reference in New Issue
Block a user