Новый раг

This commit is contained in:
2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions

View File

@@ -0,0 +1,57 @@
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
def test_code_pipeline_builds_source_symbols_edges_and_entrypoints() -> None:
pipeline = CodeIndexingPipeline()
content = """
from fastapi import APIRouter
router = APIRouter()
class UserService:
def get_user(self, user_id):
return user_id
@router.get("/users/{user_id}")
async def get_user(user_id: str):
service = UserService()
return service.get_user(user_id)
"""
docs = pipeline.index_file(
repo_id="acme/proj",
commit_sha="abc123",
path="app/api/users.py",
content=content,
)
layers = {doc.layer for doc in docs}
assert RagLayer.CODE_SOURCE_CHUNKS in layers
assert RagLayer.CODE_SYMBOL_CATALOG in layers
assert RagLayer.CODE_DEPENDENCY_GRAPH in layers
assert RagLayer.CODE_ENTRYPOINTS in layers
symbol_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_SYMBOL_CATALOG and doc.metadata["kind"] == "function")
assert "get_user" in symbol_doc.metadata["qname"]
edge_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_DEPENDENCY_GRAPH)
assert edge_doc.metadata["edge_type"] in {"calls", "imports", "inherits"}
entry_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_ENTRYPOINTS)
assert entry_doc.metadata["framework"] == "fastapi"
def test_code_pipeline_indexes_import_alias_as_symbol() -> None:
pipeline = CodeIndexingPipeline()
content = "from .v2 import ConfigManagerV2 as ConfigManager\n"
docs = pipeline.index_file(
repo_id="acme/proj",
commit_sha="abc123",
path="src/config_manager/__init__.py",
content=content,
)
alias_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_SYMBOL_CATALOG and doc.metadata["qname"] == "ConfigManager")
assert alias_doc.metadata["kind"] == "const"
assert alias_doc.metadata["lang_payload"]["import_alias"] is True

View File

@@ -0,0 +1,63 @@
from app.modules.rag.contracts.enums import RagLayer
from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
def test_docs_pipeline_builds_catalog_facts_sections_and_policy() -> None:
pipeline = DocsIndexingPipeline()
content = """---
id: api.billing.create_invoice
type: policy
domain: billing
links:
calls_api:
- api.billing.validate_invoice
tags: [billing]
status: active
---
# Create Invoice
## Spec Summary
Creates an invoice in billing.
## Request Contract
| field | type | required | validation |
| --- | --- | --- | --- |
| amount | decimal | yes | > 0 |
## Error Matrix
| status | error | client action |
| --- | --- | --- |
| 400 | invalid_amount | fix request |
## Rules
- metric: billing.invoice.created
- rule: amount must be positive
"""
docs = pipeline.index_file(
repo_id="acme/proj",
commit_sha="abc123",
path="docs/billing/create_invoice.md",
content=content,
)
layers = {doc.layer for doc in docs}
assert RagLayer.DOCS_MODULE_CATALOG in layers
assert RagLayer.DOCS_FACT_INDEX in layers
assert RagLayer.DOCS_SECTION_INDEX in layers
assert RagLayer.DOCS_POLICY_INDEX in layers
module_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_MODULE_CATALOG)
assert module_doc.metadata["module_id"] == "api.billing.create_invoice"
assert module_doc.metadata["type"] == "policy"
fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
assert any("calls_api" in text for text in fact_texts)
assert any("has_field" in text for text in fact_texts)
assert any("returns_error" in text for text in fact_texts)
section_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_SECTION_INDEX)
assert section_doc.metadata["section_path"]

View File

@@ -0,0 +1,12 @@
from app.modules.rag.contracts.enums import RetrievalMode
from app.modules.rag.retrieval.query_router import RagQueryRouter
def test_query_router_uses_docs_by_default() -> None:
router = RagQueryRouter()
assert router.resolve_mode("Какие есть требования по биллингу?") == RetrievalMode.DOCS
def test_query_router_switches_to_code_on_explicit_code_requests() -> None:
router = RagQueryRouter()
assert router.resolve_mode("Объясни как работает код endpoint create invoice") == RetrievalMode.CODE

View File

@@ -0,0 +1,9 @@
from app.modules.rag.retrieval.query_terms import extract_query_terms
def test_extract_query_terms_from_code_question() -> None:
terms = extract_query_terms("Объясни по коду как можно управлять COnfigmanager?")
assert "configmanager" in terms
assert "config_manager" in terms
assert "control" in terms