фиксирую состояние

This commit is contained in:
2026-04-07 21:41:27 +03:00
parent bc29d51a29
commit 8fb76bb331
56 changed files with 7011 additions and 316 deletions
@@ -1,4 +1,8 @@
import logging
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
from app.core.rag.indexing.docs.integration_extractor import DocsIntegrationExtractor
from app.core.rag.indexing.docs.pipeline import DocsIndexingPipeline
@@ -153,3 +157,150 @@ Create invoice
assert integration_doc.metadata["target"] == "db.billing.invoices"
assert integration_doc.metadata["target_type"] == "db"
assert integration_doc.metadata["details"]["transaction"] == "required"
def test_docs_integration_extractor_keeps_valid_blocks() -> None:
extractor = DocsIntegrationExtractor()
sections = [
SectionChunk(
section_path="Details > Интеграции > Billing DB",
section_title="Billing DB",
content=(
"- target: db.billing.invoices\n"
"- target_type: db\n"
"- direction: outbound\n"
"- interaction: writes\n"
"- via: invoice repository\n"
"- purpose: persist created invoices\n"
"- details:\n"
" - transaction: required\n"
" - tables:\n"
" - invoices\n"
" - invoice_items\n"
),
order=0,
)
]
records = extractor.extract(sections, path="docs/billing/create_invoice.md")
assert len(records) == 1
assert records[0].target == "db.billing.invoices"
assert records[0].details["transaction"] == "required"
assert records[0].details["tables"] == ["invoices", "invoice_items"]
def test_docs_integration_extractor_soft_fails_on_markdown_like_yaml(caplog) -> None:
extractor = DocsIntegrationExtractor()
sections = [
SectionChunk(
section_path="Details > Интеграции > Runtime health provider",
section_title="Runtime health provider",
content=(
"- target: runtime.health_provider\n"
"- target_type: service\n"
"- direction: outbound\n"
"- interaction: depends_on\n"
"- via: async callback `health_provider()`\n"
"- purpose: получить агрегированный health runtime\n"
"- details:\n"
" - timeout_ms: 5000\n"
" - response_type: `HealthPayload`\n"
),
order=0,
)
]
with caplog.at_level(logging.WARNING):
records = extractor.extract(sections, path="docs/api/health-endpoint.md")
assert len(records) == 1
assert records[0].target == "runtime.health_provider"
assert records[0].via == "async callback `health_provider()`"
assert records[0].details == {}
assert "docs integration parse warning" in caplog.text
assert "docs/api/health-endpoint.md" in caplog.text
def test_docs_pipeline_keeps_other_layers_when_integration_block_is_invalid(caplog) -> None:
pipeline = DocsIndexingPipeline()
content = """---
id: api.runtime.health
type: api_method
doc_type: api_method
name: runtime_health
title: Runtime Health API
module: runtime
domain: platform
sub_domain: observability
layer: application
status: active
related_docs: []
links:
uses_logic:
- logic.runtime.health
---
# Runtime Health API
## Summary
Returns current runtime health.
## Details
### Описание
Возвращает агрегированное состояние runtime.
### Сценарий
**Название:**
Read health
**Предусловия:**
- runtime is running
**Триггер:**
- client calls health endpoint
**Основной сценарий:**
1. Read current state.
2. Return payload.
### Входные параметры
| field | type | required |
| --- | --- | --- |
| verbose | boolean | no |
### Интеграции
#### Runtime health provider
- target: runtime.health_provider
- target_type: service
- direction: outbound
- interaction: depends_on
- via: async callback `health_provider()`
- purpose: получить агрегированный health runtime
- details:
- timeout_ms: 5000
- response_type: `HealthPayload`
"""
with caplog.at_level(logging.WARNING):
docs = pipeline.index_file(
repo_id="acme/proj",
commit_sha="abc123",
path="docs/api/health-endpoint.md",
content=content,
)
layers = {doc.layer for doc in docs}
assert RagLayer.DOCS_DOCUMENT_CATALOG in layers
assert RagLayer.DOCS_DOC_CHUNKS in layers
assert RagLayer.DOCS_FACT_INDEX in layers
assert RagLayer.DOCS_WORKFLOW_INDEX in layers
assert RagLayer.DOCS_RELATION_GRAPH in layers
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
assert "docs integration parse warning" in caplog.text
assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs)
@@ -45,6 +45,23 @@ def test_retrieve_builder_adds_prefer_bonus_sorting() -> None:
assert params["prefer_like_0"] == "%/test\\_%.py"
def test_retrieve_builder_adds_metadata_filters() -> None:
builder = RetrievalStatementBuilder()
sql, params = builder.build_retrieve(
"rag-1",
[0.1, 0.2],
query_text="notification flow",
metadata_domain="notifications",
metadata_subdomain="delivery_loop",
)
assert "metadata_json->>'domain'" in sql
assert "metadata_json->>'subdomain'" in sql
assert params["metadata_domain"] == "notifications"
assert params["metadata_subdomain"] == "delivery_loop"
def test_lexical_builder_omits_test_filters_when_not_requested() -> None:
builder = RetrievalStatementBuilder()