ввв
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
|
||||
ApiEndpointCollector,
|
||||
)
|
||||
|
||||
|
||||
def test_collector_returns_method_and_path_lines() -> None:
|
||||
rows = [
|
||||
{
|
||||
"metadata": {
|
||||
"endpoint": "GET|POST /actions/{action}",
|
||||
"summary_text": "Endpoint for runtime control actions",
|
||||
},
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"path": "docs/api/control-actions-endpoint.md",
|
||||
"content": "",
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"endpoint": "GET /health",
|
||||
},
|
||||
"title": "HTTP API /health",
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"content": "",
|
||||
},
|
||||
]
|
||||
|
||||
endpoints = ApiEndpointCollector().collect(rows)
|
||||
|
||||
assert "GET /actions/{action}" in endpoints
|
||||
assert "POST /actions/{action}" in endpoints
|
||||
assert "GET /health" in endpoints
|
||||
|
||||
|
||||
def test_collector_ignores_file_paths_from_content() -> None:
|
||||
rows = [
|
||||
{
|
||||
"metadata": {
|
||||
"endpoint": "GET /health",
|
||||
"summary_text": "Uses src/telegram_notify_app/control_api.py",
|
||||
},
|
||||
"title": "Health endpoint",
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"content": "See /telegram_notify_app/control_api.py and /telegram_notify_app/worker.py",
|
||||
}
|
||||
]
|
||||
|
||||
endpoints = ApiEndpointCollector().collect(rows)
|
||||
|
||||
assert endpoints == ["GET /health"]
|
||||
|
||||
|
||||
def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None:
|
||||
rows = [
|
||||
{
|
||||
"metadata": {
|
||||
"summary_text": "Control actions endpoint",
|
||||
},
|
||||
"title": "HTTP API /actions/{action}",
|
||||
"path": "docs/api/control-actions-endpoint.md",
|
||||
"content": "",
|
||||
}
|
||||
]
|
||||
|
||||
endpoints = ApiEndpointCollector().collect(rows)
|
||||
|
||||
assert endpoints == ["GET /actions/{action}"]
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
|
||||
|
||||
def _route(*, hints: list[str], terms: list[str], subintent: str = V2Subintent.SUMMARY) -> V2RouteResult:
|
||||
|
||||
@@ -4,11 +4,11 @@ import asyncio
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter, V2Process
|
||||
from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path
|
||||
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
|
||||
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path
|
||||
from app.core.api.domain.models.agent_request import AgentRequest
|
||||
from app.core.api.domain.models.agent_session import AgentSession
|
||||
from app.schemas.orchestration import RequestExecutionStatus
|
||||
|
||||
@@ -2,8 +2,8 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
|
||||
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||
|
||||
|
||||
class FakeRetriever:
|
||||
@@ -50,6 +50,24 @@ class FakeRetriever:
|
||||
self.calls.append(("substring", list(path_needles)))
|
||||
return []
|
||||
|
||||
async def list_docs_scope_rows(self, _rag_session_id: str, *, limit: int = 8000) -> list[dict]:
|
||||
del limit
|
||||
self.calls.append(("scope_rows", None))
|
||||
return [
|
||||
{
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"title": "HTTP API /health",
|
||||
"metadata": {"type": "api_method", "endpoint": "GET /health"},
|
||||
},
|
||||
{
|
||||
"path": "docs/api/send-message-endpoint.md",
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"title": "HTTP API /send",
|
||||
"metadata": {"type": "api_method", "endpoint": "GET /send"},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_v2_rag_adapter_seeds_exact_rows_from_plan_hints() -> None:
|
||||
adapter = V2RagRetrievalAdapter(FakeRetriever())
|
||||
@@ -79,3 +97,21 @@ def test_v2_rag_adapter_uses_substring_fallback_for_missing_hint() -> None:
|
||||
asyncio.run(adapter.fetch_rows("rag-1", "find file", plan))
|
||||
|
||||
assert ("substring", ["missing-health-endpoint.md"]) in retriever.calls
|
||||
|
||||
|
||||
def test_v2_rag_adapter_applies_query_signal_filter_for_api_exposed() -> None:
|
||||
adapter = V2RagRetrievalAdapter(FakeRetriever())
|
||||
plan = RetrievalPlan(
|
||||
profile="api_exposed",
|
||||
layers=["D1_DOCUMENT_CATALOG"],
|
||||
limit=50,
|
||||
filters={
|
||||
"metadata.type": "api_method",
|
||||
"query_signals": ["health"],
|
||||
},
|
||||
)
|
||||
|
||||
rows = asyncio.run(adapter.fetch_rows("rag-1", "health endpoints", plan))
|
||||
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["path"] == "docs/api/health-endpoint.md"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
|
||||
from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
|
||||
|
||||
|
||||
def _route(
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Tests for pre-LLM scope grounding from D1/D3 catalog rows (no extra RAG layer)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter
|
||||
from app.core.agent.utils.process_v2.models import V2ScopeType
|
||||
|
||||
|
||||
class FakeLlm:
|
||||
def __init__(self, response: str) -> None:
|
||||
self.response = response
|
||||
|
||||
def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str:
|
||||
del prompt_name, user_input
|
||||
return self.response
|
||||
|
||||
|
||||
def _llm_ok() -> str:
|
||||
return json.dumps(
|
||||
{
|
||||
"routing_domain": "DOCS",
|
||||
"intent": "DOC_EXPLAIN",
|
||||
"subintent": "SUMMARY",
|
||||
"confidence": 0.9,
|
||||
"reason_short": "ok",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def _fixture_rows() -> list[dict]:
|
||||
return [
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/billing/overview.md",
|
||||
"title": "Billing",
|
||||
"content": "",
|
||||
"metadata": {"domain": "billing", "summary_text": "Billing domain overview"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/billing/invoices.md",
|
||||
"title": "Invoices",
|
||||
"content": "",
|
||||
"metadata": {"domain": "billing", "subdomain": "invoice", "tags": ["invoice", "invoices"]},
|
||||
},
|
||||
{
|
||||
"layer": "D3_ENTITY_CATALOG",
|
||||
"path": "docs/domains/order.md",
|
||||
"title": "Order",
|
||||
"content": "",
|
||||
"metadata": {"entity_name": "Order", "domain": "billing"},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/api/invoices_post.md",
|
||||
"title": "POST /api/v1/invoices",
|
||||
"content": "",
|
||||
"metadata": {
|
||||
"doc_type": "api_method",
|
||||
"domain": "billing",
|
||||
"endpoint": "/api/v1/invoices",
|
||||
},
|
||||
},
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/widgets/readme.md",
|
||||
"title": "Widgets",
|
||||
"content": "",
|
||||
"metadata": {"domain": "widgets", "summary_text": "Unrelated domain for negative tests"},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _router() -> V2IntentRouter:
|
||||
return V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: _fixture_rows())
|
||||
|
||||
|
||||
def test_scope_global_project_wide_enumeration() -> None:
|
||||
r = _router().route("какие api методы есть в проекте", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.GLOBAL
|
||||
|
||||
|
||||
def test_scope_domain_billing() -> None:
|
||||
r = _router().route("какие api есть в billing", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.DOMAIN
|
||||
assert r.anchors.process_domain == "billing"
|
||||
assert any(c.value == "billing" for c in r.anchors.candidate_domains)
|
||||
|
||||
|
||||
def test_scope_subdomain_billing_invoices() -> None:
|
||||
r = _router().route("какие api есть в billing invoices", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.SUBDOMAIN
|
||||
assert r.anchors.process_domain == "billing"
|
||||
assert r.anchors.process_subdomain == "invoice"
|
||||
|
||||
|
||||
def test_scope_entity_order_doc() -> None:
|
||||
r = _router().route("дай доку по Order", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.ENTITY
|
||||
assert "order" in [e.lower() for e in r.anchors.entity_names]
|
||||
|
||||
|
||||
def test_scope_entity_endpoint_path() -> None:
|
||||
r = _router().route("где описан POST /api/v1/invoices", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.ENTITY
|
||||
assert "/api/v1/invoices" in r.anchors.endpoint_paths
|
||||
|
||||
|
||||
def test_scope_vague_no_false_domain() -> None:
|
||||
r = _router().route("что там с фывырапфыв", rag_session_id="sess-1")
|
||||
assert r.scope_type == V2ScopeType.UNKNOWN
|
||||
assert r.anchors.process_domain is None
|
||||
|
||||
|
||||
def test_scope_russian_payments_phrase_matches_tag() -> None:
|
||||
rows = [
|
||||
*_fixture_rows(),
|
||||
{
|
||||
"layer": "D1_DOCUMENT_CATALOG",
|
||||
"path": "docs/billing/payments_ru.md",
|
||||
"title": "Платежи",
|
||||
"content": "",
|
||||
"metadata": {"domain": "billing", "tags": ["платежи"]},
|
||||
},
|
||||
]
|
||||
router = V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: rows)
|
||||
r = router.route("какие методы есть в платежи", rag_session_id="sess-1")
|
||||
assert r.scope_type in {V2ScopeType.DOMAIN, V2ScopeType.ENTITY, V2ScopeType.SUBDOMAIN}
|
||||
assert r.anchors.process_domain == "billing" or any("платеж" in c.value for c in r.anchors.candidate_entities)
|
||||
|
||||
|
||||
def test_router_without_session_skips_db_and_keeps_target_terms() -> None:
|
||||
r = V2IntentRouter(llm=FakeLlm(_llm_ok())).route("Покажи где описан RuntimeHealth и /health")
|
||||
assert r.scope_type == V2ScopeType.UNKNOWN
|
||||
assert "runtimehealth" in r.target_terms
|
||||
@@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver
|
||||
|
||||
|
||||
def test_augment_adds_untracked_files_from_filesystem(tmp_path) -> None:
|
||||
root = tmp_path / "repo"
|
||||
root.mkdir()
|
||||
docs = root / "docs" / "api"
|
||||
docs.mkdir(parents=True)
|
||||
control_path = docs / "control-actions-endpoint.md"
|
||||
control_content = "---\ndoc_type: api_method\n---\nGET|POST /actions/{action}\n"
|
||||
control_path.write_text(control_content, encoding="utf-8")
|
||||
|
||||
incoming = [
|
||||
{
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"content": "health",
|
||||
"content_hash": hashlib.sha256(b"health").hexdigest(),
|
||||
}
|
||||
]
|
||||
|
||||
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
|
||||
paths = {item["path"] for item in out}
|
||||
assert "docs/api/health-endpoint.md" in paths
|
||||
assert "docs/api/control-actions-endpoint.md" in paths
|
||||
|
||||
|
||||
def test_augment_prefers_request_payload_for_existing_path(tmp_path) -> None:
|
||||
root = tmp_path / "repo"
|
||||
root.mkdir()
|
||||
docs = root / "docs" / "api"
|
||||
docs.mkdir(parents=True)
|
||||
file_path = docs / "health-endpoint.md"
|
||||
file_path.write_text("from-disk", encoding="utf-8")
|
||||
|
||||
incoming_content = "from-request"
|
||||
incoming = [
|
||||
{
|
||||
"path": "docs/api/health-endpoint.md",
|
||||
"content": incoming_content,
|
||||
"content_hash": hashlib.sha256(incoming_content.encode("utf-8")).hexdigest(),
|
||||
}
|
||||
]
|
||||
|
||||
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
|
||||
by_path = {item["path"]: item for item in out}
|
||||
assert by_path["docs/api/health-endpoint.md"]["content"] == incoming_content
|
||||
|
||||
|
||||
def test_augment_ignores_files_outside_root_docs(tmp_path) -> None:
|
||||
root = tmp_path / "repo"
|
||||
root.mkdir()
|
||||
(root / "docs").mkdir()
|
||||
(root / "docs" / "README.md").write_text("docs", encoding="utf-8")
|
||||
(root / "src").mkdir()
|
||||
(root / "src" / "app.py").write_text("print('x')", encoding="utf-8")
|
||||
|
||||
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=[])
|
||||
paths = {item["path"] for item in out}
|
||||
assert "docs/README.md" in paths
|
||||
assert "src/app.py" not in paths
|
||||
|
||||
|
||||
def test_augment_keeps_docs_when_request_uses_absolute_paths(tmp_path) -> None:
|
||||
root = tmp_path / "repo"
|
||||
root.mkdir()
|
||||
(root / "docs" / "api").mkdir(parents=True)
|
||||
(root / "docs" / "api" / "health-endpoint.md").write_text("disk", encoding="utf-8")
|
||||
|
||||
absolute_docs = str(root / "docs" / "api" / "control-actions-endpoint.md")
|
||||
absolute_src = str(root / "src" / "app.py")
|
||||
incoming = [
|
||||
{"path": absolute_docs, "content": "req-doc", "content_hash": hashlib.sha256(b"req-doc").hexdigest()},
|
||||
{"path": absolute_src, "content": "req-src", "content_hash": hashlib.sha256(b"req-src").hexdigest()},
|
||||
]
|
||||
|
||||
out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
|
||||
paths = {item["path"] for item in out}
|
||||
assert "docs/api/control-actions-endpoint.md" in paths
|
||||
assert "src/app.py" not in paths
|
||||
@@ -304,3 +304,34 @@ Read health
|
||||
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
|
||||
assert "docs integration parse warning" in caplog.text
|
||||
assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs)
|
||||
|
||||
|
||||
def test_docs_pipeline_tolerates_broken_frontmatter_and_keeps_api_type() -> None:
|
||||
pipeline = DocsIndexingPipeline()
|
||||
content = """---
|
||||
id: api.control_actions_endpoint
|
||||
type: api_method
|
||||
doc_type: api_method
|
||||
title: HTTP API /actions/{action}
|
||||
endpoint: GET|POST /actions/{action}
|
||||
links:
|
||||
called_by:
|
||||
- ext.operator
|
||||
tags:
|
||||
- api
|
||||
---
|
||||
# HTTP API /actions/{action}
|
||||
|
||||
## Summary
|
||||
|
||||
Control actions endpoint.
|
||||
"""
|
||||
docs = pipeline.index_file(
|
||||
repo_id="acme/proj",
|
||||
commit_sha="abc123",
|
||||
path="docs/api/control-actions-endpoint.md",
|
||||
content=content,
|
||||
)
|
||||
catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
|
||||
assert catalog.metadata["type"] == "api_method"
|
||||
assert catalog.metadata["title"] == "HTTP API /actions/{action}"
|
||||
|
||||
Reference in New Issue
Block a user