import logging from app.core.rag.contracts.enums import RagLayer from app.core.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk from app.core.rag.indexing.docs.integration_extractor import DocsIntegrationExtractor from app.core.rag.indexing.docs.pipeline import DocsIndexingPipeline def test_docs_pipeline_builds_docs_layers_from_modern_markdown_structure() -> None: pipeline = DocsIndexingPipeline() content = """--- id: api.billing.create_invoice type: api_method doc_type: api_method name: create_invoice title: Create Invoice API module: billing domain: billing sub_domain: invoices layer: application status: draft updated_at: 2026-03-23 tags: [billing, api] entities: [Invoice] parent: billing_api children: [] related_docs: [api.billing.validate_invoice] links: called_by: - ui.billing.invoice_form uses_logic: - logic.billing.invoice_validation --- # Create Invoice API ## Summary Creates an invoice in billing. ## Details ### Описание Создает счет на оплату. ### Сценарий **Название:** Create invoice **Предусловия:** - billing service is available **Триггер:** - client sends create invoice request **Основной сценарий:** 1. Validate payload. 2. Create invoice. **Альтернативный сценарий:** 1. Reject invalid payload. **Обработка ошибок:** 1. Return validation error. **Постусловие:** - Invoice is created. ### Контракт #### Метаданные вызова - Method: POST - Auth: USER - Idempotency: false ### Входные параметры | field | type | required | validation | | --- | --- | --- | --- | | amount | decimal | yes | > 0 | ### Выходные параметры | field | type | required | | --- | --- | --- | | invoice_id | string | yes | ### Интеграции #### Billing DB - target: db.billing.invoices - target_type: db - direction: outbound - interaction: writes - via: invoice repository - purpose: persist created invoices - details: - transaction: required - tables: - invoices - invoice_items ### Ошибки | status | error | client action | | --- | --- | --- | | 400 | invalid_amount | fix request | """ docs = pipeline.index_file( repo_id="acme/proj", commit_sha="abc123", path="docs/billing/create_invoice.md", content=content, ) layers = {doc.layer for doc in docs} assert RagLayer.DOCS_DOC_CHUNKS in layers assert RagLayer.DOCS_DOCUMENT_CATALOG in layers assert RagLayer.DOCS_FACT_INDEX in layers assert RagLayer.DOCS_ENTITY_CATALOG in layers assert RagLayer.DOCS_WORKFLOW_INDEX in layers assert RagLayer.DOCS_RELATION_GRAPH in layers assert RagLayer.DOCS_INTEGRATION_INDEX in layers catalog_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG) assert catalog_doc.metadata["document_id"] == "api.billing.create_invoice" assert catalog_doc.metadata["module"] == "billing" assert catalog_doc.metadata["domain"] == "billing" assert catalog_doc.metadata["subdomain"] == "invoices" assert catalog_doc.metadata["summary_text"] == "Creates an invoice in billing." fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX] assert any("has_field amount" in text for text in fact_texts) assert any("field_required amount:yes" in text for text in fact_texts) assert any("returns_error invalid_amount" in text for text in fact_texts) entity_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_ENTITY_CATALOG) assert entity_doc.metadata["entity_name"] == "Invoice" workflow_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_WORKFLOW_INDEX) assert workflow_doc.metadata["workflow_name"] == "Create invoice" relation_targets = [doc.metadata["target_id"] for doc in docs if doc.layer == RagLayer.DOCS_RELATION_GRAPH] assert "billing_api" in relation_targets assert "api.billing.validate_invoice" in relation_targets assert "logic.billing.invoice_validation" in relation_targets assert "Invoice" in relation_targets chunk_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOC_CHUNKS) assert chunk_doc.metadata["section_path"] assert chunk_doc.metadata["artifact_type"] == "DOCS" assert chunk_doc.metadata["domain"] == "billing" assert chunk_doc.metadata["subdomain"] == "invoices" integration_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_INTEGRATION_INDEX) assert integration_doc.metadata["target"] == "db.billing.invoices" assert integration_doc.metadata["target_type"] == "db" assert integration_doc.metadata["details"]["transaction"] == "required" def test_docs_integration_extractor_keeps_valid_blocks() -> None: extractor = DocsIntegrationExtractor() sections = [ SectionChunk( section_path="Details > Интеграции > Billing DB", section_title="Billing DB", content=( "- target: db.billing.invoices\n" "- target_type: db\n" "- direction: outbound\n" "- interaction: writes\n" "- via: invoice repository\n" "- purpose: persist created invoices\n" "- details:\n" " - transaction: required\n" " - tables:\n" " - invoices\n" " - invoice_items\n" ), order=0, ) ] records = extractor.extract(sections, path="docs/billing/create_invoice.md") assert len(records) == 1 assert records[0].target == "db.billing.invoices" assert records[0].details["transaction"] == "required" assert records[0].details["tables"] == ["invoices", "invoice_items"] def test_docs_integration_extractor_soft_fails_on_markdown_like_yaml(caplog) -> None: extractor = DocsIntegrationExtractor() sections = [ SectionChunk( section_path="Details > Интеграции > Runtime health provider", section_title="Runtime health provider", content=( "- target: runtime.health_provider\n" "- target_type: service\n" "- direction: outbound\n" "- interaction: depends_on\n" "- via: async callback `health_provider()`\n" "- purpose: получить агрегированный health runtime\n" "- details:\n" " - timeout_ms: 5000\n" " - response_type: `HealthPayload`\n" ), order=0, ) ] with caplog.at_level(logging.WARNING): records = extractor.extract(sections, path="docs/api/health-endpoint.md") assert len(records) == 1 assert records[0].target == "runtime.health_provider" assert records[0].via == "async callback `health_provider()`" assert records[0].details == {} assert "docs integration parse warning" in caplog.text assert "docs/api/health-endpoint.md" in caplog.text def test_docs_pipeline_keeps_other_layers_when_integration_block_is_invalid(caplog) -> None: pipeline = DocsIndexingPipeline() content = """--- id: api.runtime.health type: api_method doc_type: api_method name: runtime_health title: Runtime Health API module: runtime domain: platform sub_domain: observability layer: application status: active related_docs: [] links: uses_logic: - logic.runtime.health --- # Runtime Health API ## Summary Returns current runtime health. ## Details ### Описание Возвращает агрегированное состояние runtime. ### Сценарий **Название:** Read health **Предусловия:** - runtime is running **Триггер:** - client calls health endpoint **Основной сценарий:** 1. Read current state. 2. Return payload. ### Входные параметры | field | type | required | | --- | --- | --- | | verbose | boolean | no | ### Интеграции #### Runtime health provider - target: runtime.health_provider - target_type: service - direction: outbound - interaction: depends_on - via: async callback `health_provider()` - purpose: получить агрегированный health runtime - details: - timeout_ms: 5000 - response_type: `HealthPayload` """ with caplog.at_level(logging.WARNING): docs = pipeline.index_file( repo_id="acme/proj", commit_sha="abc123", path="docs/api/health-endpoint.md", content=content, ) layers = {doc.layer for doc in docs} assert RagLayer.DOCS_DOCUMENT_CATALOG in layers assert RagLayer.DOCS_DOC_CHUNKS in layers assert RagLayer.DOCS_FACT_INDEX in layers assert RagLayer.DOCS_WORKFLOW_INDEX in layers assert RagLayer.DOCS_RELATION_GRAPH in layers assert RagLayer.DOCS_INTEGRATION_INDEX in layers assert "docs integration parse warning" in caplog.text assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs)