Новый раг

2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions
--- a/app/modules/pycache/application.cpython-312.pyc
+++ b/app/modules/pycache/application.cpython-312.pyc
--- a/app/modules/agent/engine/orchestrator/pycache/step_registry.cpython-312.pyc
+++ b/app/modules/agent/engine/orchestrator/pycache/step_registry.cpython-312.pyc
--- a/app/modules/agent/engine/orchestrator/pycache/task_spec_builder.cpython-312.pyc
+++ b/app/modules/agent/engine/orchestrator/pycache/task_spec_builder.cpython-312.pyc
--- a/app/modules/agent/engine/orchestrator/pycache/template_registry.cpython-312.pyc
+++ b/app/modules/agent/engine/orchestrator/pycache/template_registry.cpython-312.pyc
--- a/app/modules/agent/engine/orchestrator/actions/pycache/explain_actions.cpython-312.pyc
+++ b/app/modules/agent/engine/orchestrator/actions/pycache/explain_actions.cpython-312.pyc
--- a/app/modules/agent/engine/orchestrator/actions/explain_actions.py
+++ b/app/modules/agent/engine/orchestrator/actions/explain_actions.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 from collections import Counter
 from app.modules.agent.engine.orchestrator.actions.common import ActionSupport
 from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
 from app.modules.agent.engine.orchestrator.models import ArtifactType
@@ -7,22 +9,28 @@ from app.modules.agent.engine.orchestrator.models import ArtifactType
 class ExplainActions(ActionSupport):
    def collect_sources(self, ctx: ExecutionContext) -> list[str]:
        rag_items = list(ctx.task.metadata.get("rag_items", []) or [])
        rag_context = str(ctx.task.metadata.get("rag_context", ""))
        confluence_context = str(ctx.task.metadata.get("confluence_context", ""))
        files_map = dict(ctx.task.metadata.get("files_map", {}) or {})
        payload = {
            "rag_items": rag_items,
            "rag_context": rag_context,
            "confluence_context": confluence_context,
            "files_count": len(files_map),
            "source_profile": self._source_profile(rag_items),
        }
        evidence_ids: list[str] = []
-        if rag_context.strip():
+        for item in rag_items[:5]:
            snippet = str(item.get("content", "") or "").strip()
            if not snippet:
                continue
            evidence_ids.append(
                self.add_evidence(
                    ctx,
                    source_type="rag_chunk",
-                    source_ref=ctx.task.rag_session_id,
+                    source_ref=str(item.get("source", ctx.task.rag_session_id)),
-                    snippet=rag_context,
+                    snippet=snippet,
                    score=0.9,
                )
            )
@@ -38,50 +46,214 @@ class ExplainActions(ActionSupport):
    def extract_logic(self, ctx: ExecutionContext) -> list[str]:
        sources = self.get(ctx, "sources", {}) or {}
        message = ctx.task.user_message
        profile = str(sources.get("source_profile", "docs"))
        ru = self._is_russian(message)
        notes = (
            "Используй код как основной источник и ссылайся на конкретные файлы и слои."
            if profile == "code" and ru
            else "Use code as the primary source and cite concrete files/layers."
            if profile == "code"
            else "Используй требования и документацию как основной источник."
            if ru
            else "Use requirements/docs as primary source over code."
        )
        logic = {
            "request": message,
-            "assumptions": ["requirements-first"],
+            "assumptions": [f"{profile}-first"],
-            "notes": "Use requirements/docs as primary source over code.",
+            "notes": notes,
            "source_summary": sources,
        }
        return [self.put(ctx, "logic_model", ArtifactType.STRUCTURED_JSON, logic)]
    def build_sequence(self, ctx: ExecutionContext) -> list[str]:
        message = ctx.task.user_message
        mermaid = "\n".join(
            [
                "```mermaid",
                "sequenceDiagram",
                "participant User",
                "participant Agent",
                "participant Docs",
                "User->>Agent: " + message[:80],
                "Agent->>Docs: Find relevant requirements",
                "Docs-->>Agent: Relevant context",
                "Agent-->>User: Structured explanation",
                "```",
            ]
        )
        return [self.put(ctx, "sequence_diagram", ArtifactType.TEXT, mermaid)]
    def build_use_cases(self, ctx: ExecutionContext) -> list[str]:
        lines = [
            "### Use Cases",
            "- Analyze requirement fragments relevant to user question",
            "- Reconstruct behavior flow and decision points",
            "- Return user-focused explanation with constraints",
        ]
        return [self.put(ctx, "use_cases", ArtifactType.TEXT, "\n".join(lines))]
    def summarize(self, ctx: ExecutionContext) -> list[str]:
-        sequence = str(self.get(ctx, "sequence_diagram", "") or "")
+        sources = self.get(ctx, "sources", {}) or {}
-        use_cases = str(self.get(ctx, "use_cases", "") or "")
+        profile = str(sources.get("source_profile", "docs"))
-        answer = "\n\n".join(
+        items = list(sources.get("rag_items", []) or [])
-            [
+        message = ctx.task.user_message
-                "## Summary",
+        ru = self._is_russian(message)
-                "The requested project part is explained from requirements/docs context.",
+        answer = self._code_answer(items, russian=ru) if profile == "code" else self._docs_answer(items, russian=ru)
                sequence,
                use_cases,
            ]
        )
        return [self.put(ctx, "final_answer", ArtifactType.TEXT, answer)]
    def _source_profile(self, items: list[dict]) -> str:
        layers = [str(item.get("layer", "") or "") for item in items]
        if any(layer.startswith("C") for layer in layers):
            return "code"
        return "docs"
    def _is_russian(self, text: str) -> bool:
        return any("а" <= ch.lower() <= "я" or ch.lower() == "ё" for ch in text)
    def _code_answer(self, items: list[dict], *, russian: bool) -> str:
        if not items:
            return (
                "Не удалось найти релевантный кодовый контекст по этому запросу."
                if russian
                else "No relevant code context was found for this request."
            )
        details = self._code_details(items, russian=russian)
        refs = self._code_references(items, russian=russian)
        parts = [
            "## Кратко" if russian else "## Summary",
            details,
        ]
        if refs:
            parts.append(refs)
        return "\n\n".join(part for part in parts if part.strip())
    def _docs_answer(self, items: list[dict], *, russian: bool) -> str:
        return (
            "Запрошенная часть проекта объяснена на основе требований и документации."
            if russian
            else "The requested project part is explained from requirements/docs context."
        )
    def _code_details(self, items: list[dict], *, russian: bool) -> str:
        if not items:
            return ""
        symbol_items = [item for item in items if str(item.get("layer", "")) == "C1_SYMBOL_CATALOG"]
        edge_items = [item for item in items if str(item.get("layer", "")) == "C2_DEPENDENCY_GRAPH"]
        source_items = [item for item in items if str(item.get("layer", "")) == "C0_SOURCE_CHUNKS"]
        lines = ["### Что видно по коду" if russian else "### What the code shows"]
        alias = self._find_alias_symbol(symbol_items)
        if alias:
            imported_from = str(alias.get("metadata", {}).get("lang_payload", {}).get("imported_from", "")).strip()
            if russian:
                lines.append(f"- `ConfigManager` в проекте доступен как alias в `{alias.get('source', '')}` и указывает на `{imported_from}`.")
            else:
                lines.append(f"- `ConfigManager` is exposed as an alias in `{alias.get('source', '')}` and points to `{imported_from}`.")
        management_hint = self._management_summary(symbol_items, edge_items, source_items, russian=russian)
        if management_hint:
            lines.extend(management_hint)
        symbol_lines = 0
        for item in symbol_items[:4]:
            title = str(item.get("title", "") or "")
            source = str(item.get("source", "") or "")
            content = str(item.get("content", "") or "").strip()
            summary = content.splitlines()[-1].strip() if content else ""
            if not title:
                continue
            if self._is_test_path(source):
                continue
            if self._is_control_symbol(title):
                continue
            if russian:
                lines.append(f"- Символ `{title}` из `{source}`: {summary}")
            else:
                lines.append(f"- Symbol `{title}` from `{source}`: {summary}")
            symbol_lines += 1
            if symbol_lines >= 2:
                break
        edge_map: dict[str, list[str]] = {}
        for item in edge_items:
            meta = item.get("metadata", {}) or {}
            src_qname = str(meta.get("src_qname", "") or "").strip()
            dst_ref = str(meta.get("dst_ref", "") or "").strip()
            if not src_qname or not dst_ref:
                continue
            if self._is_test_path(str(item.get("source", "") or "")):
                continue
            edge_map.setdefault(src_qname, [])
            if dst_ref not in edge_map[src_qname]:
                edge_map[src_qname].append(dst_ref)
        for src_qname, targets in list(edge_map.items())[:3]:
            joined = ", ".join(targets[:4])
            if russian:
                lines.append(f"- `{src_qname}` вызывает или использует: {joined}.")
            else:
                lines.append(f"- `{src_qname}` calls or uses: {joined}.")
        for item in source_items[:2]:
            source = str(item.get("source", "") or "")
            content = str(item.get("content", "") or "")
            if self._is_test_path(source):
                continue
            if "management" in content.lower() or "control" in content.lower():
                snippet = " ".join(content.splitlines()[:4]).strip()
                if russian:
                    lines.append(f"- В `{source}` есть прямое указание на управление через конфиг/API: `{snippet[:220]}`")
                else:
                    lines.append(f"- `{source}` directly mentions config/API control: `{snippet[:220]}`")
        return "\n".join(lines)
    def _code_references(self, items: list[dict], *, russian: bool) -> str:
        paths = [str(item.get("source", "") or "") for item in items if item.get("source") and not self._is_test_path(str(item.get("source", "") or ""))]
        if not paths:
            return ""
        lines = ["### Где смотреть в проекте" if russian else "### Where to look in the project"]
        for path, _count in Counter(paths).most_common(3):
            lines.append(f"- `{path}`")
        return "\n".join(lines)
    def _find_alias_symbol(self, items: list[dict]) -> dict | None:
        for item in items:
            meta = item.get("metadata", {}) or {}
            payload = meta.get("lang_payload", {}) or {}
            qname = str(meta.get("qname", "") or "")
            if qname == "ConfigManager" and payload.get("import_alias"):
                return item
        return None
    def _is_test_path(self, path: str) -> bool:
        lowered = path.lower()
        return lowered.startswith("tests/") or "/tests/" in lowered or lowered.startswith("test_") or "/test_" in lowered
    def _is_control_symbol(self, title: str) -> bool:
        lowered = title.lower()
        return any(token in lowered for token in ("controlchannel", "controlchannelbridge", "on_start", "on_stop", "on_status"))
    def _management_summary(
        self,
        symbol_items: list[dict],
        edge_items: list[dict],
        source_items: list[dict],
        *,
        russian: bool,
    ) -> list[str]:
        qnames = {str((item.get("metadata", {}) or {}).get("qname", "") or ""): item for item in symbol_items if not self._is_test_path(str(item.get("source", "") or ""))}
        source_texts = [str(item.get("content", "") or "") for item in source_items if not self._is_test_path(str(item.get("source", "") or ""))]
        result: list[str] = []
        if any("управление через api" in text.lower() or "section management" in text.lower() or "секция management" in text.lower() for text in source_texts):
            result.append(
                "- Для `ConfigManager` в коде предусмотрен отдельный интерфейс управления через API/конфиг: это прямо указано в публичной точке входа модуля."
                if russian
                else "- `ConfigManager` has a dedicated API/config-based management interface; this is stated in the module's public entrypoint."
            )
        has_control_channel = "ControlChannel" in qnames
        has_bridge = "ControlChannelBridge" in qnames
        if has_control_channel:
            result.append(
                "- Базовый контракт управления задает `ControlChannel`: он определяет команды `start` и `stop` для внешнего канала управления."
                if russian
                else "- The base management contract is `ControlChannel`, which defines external `start` and `stop` commands."
            )
        if has_bridge:
            result.append(
                "- `ControlChannelBridge` связывает внешний канал управления с lifecycle-методами менеджера: `on_start`, `on_stop`, `on_status`."
                if russian
                else "- `ControlChannelBridge` maps the external control channel to manager lifecycle methods: `on_start`, `on_stop`, `on_status`."
            )
        edge_refs = []
        for item in edge_items:
            if self._is_test_path(str(item.get("source", "") or "")):
                continue
            meta = item.get("metadata", {}) or {}
            src = str(meta.get("src_qname", "") or "")
            dst = str(meta.get("dst_ref", "") or "")
            if src.startswith("ControlChannelBridge.") and dst in {"self._start_runtime", "self._stop_runtime", "self._get_status"}:
                edge_refs.append((src, dst))
        if edge_refs:
            mappings = ", ".join(f"{src} -> {dst}" for src, dst in edge_refs[:3])
            result.append(
                f"- По связям в коде видно, что команды управления маршрутизируются так: {mappings}."
                if russian
                else f"- The code relationships show the management command routing: {mappings}."
            )
        return result
--- a/app/modules/agent/engine/orchestrator/step_registry.py
+++ b/app/modules/agent/engine/orchestrator/step_registry.py
@@ -24,8 +24,6 @@ class StepRegistry:
            "finalize_graph_output": self._finalize_graph_output,
            "collect_sources": explain.collect_sources,
            "extract_logic": explain.extract_logic,
            "build_sequence": explain.build_sequence,
            "build_use_cases": explain.build_use_cases,
            "summarize": explain.summarize,
            "fetch_source_doc": review.fetch_source_doc,
            "normalize_document": review.normalize_document,
--- a/app/modules/agent/engine/orchestrator/task_spec_builder.py
+++ b/app/modules/agent/engine/orchestrator/task_spec_builder.py
@@ -24,6 +24,7 @@ class TaskSpecBuilder:
        route: RoutingMeta,
        attachments: list[dict],
        files: list[dict],
        rag_items: list[dict],
        rag_context: str,
        confluence_context: str,
        files_map: dict[str, dict],
@@ -32,6 +33,7 @@ class TaskSpecBuilder:
        output_contract = self._output_contract(scenario)
        constraints = self._constraints_for(scenario)
        metadata = {
            "rag_items": rag_items,
            "rag_context": rag_context,
            "confluence_context": confluence_context,
            "files_map": files_map,
@@ -124,14 +126,7 @@ class TaskSpecBuilder:
    def _output_contract(self, scenario: Scenario) -> OutputContract:
        if scenario == Scenario.EXPLAIN_PART:
-            return OutputContract(
+            return OutputContract(result_type="answer", sections=[OutputSection(name="summary", format="markdown")])
                result_type="answer",
                sections=[
                    OutputSection(name="sequence_diagram", format="mermaid"),
                    OutputSection(name="use_cases", format="markdown"),
                    OutputSection(name="summary", format="markdown"),
                ],
            )
        if scenario == Scenario.ANALYTICS_REVIEW:
            return OutputContract(
                result_type="review_report",
--- a/app/modules/agent/engine/orchestrator/template_registry.py
+++ b/app/modules/agent/engine/orchestrator/template_registry.py
@@ -43,9 +43,7 @@ class ScenarioTemplateRegistry:
        steps = [
            self._step("collect_sources", "Collect sources", "collect_sources", outputs=[self._out("sources", ArtifactType.STRUCTURED_JSON)]),
            self._step("extract_logic", "Extract logic", "extract_logic", depends_on=["collect_sources"], outputs=[self._out("logic_model", ArtifactType.STRUCTURED_JSON)]),
-            self._step("build_sequence", "Build sequence", "build_sequence", depends_on=["extract_logic"], outputs=[self._out("sequence_diagram", ArtifactType.TEXT)]),
+            self._step("summarize", "Summarize", "summarize", depends_on=["extract_logic"], outputs=[self._out("final_answer", ArtifactType.TEXT)]),
            self._step("build_use_cases", "Build use cases", "build_use_cases", depends_on=["extract_logic"], outputs=[self._out("use_cases", ArtifactType.TEXT)]),
            self._step("summarize", "Summarize", "summarize", depends_on=["build_sequence", "build_use_cases"], outputs=[self._out("final_answer", ArtifactType.TEXT)]),
        ]
        return self._plan(task, "explain_part_v1", steps, [self._gate("evidence_required"), self._gate("non_empty_answer_or_changeset")])
--- a/app/modules/agent/service.py
+++ b/app/modules/agent/service.py
@@ -117,6 +117,7 @@ class GraphAgentRuntime:
            route=route_meta,
            attachments=attachments,
            files=files,
            rag_items=rag_ctx,
            rag_context=self._format_rag(rag_ctx),
            confluence_context=self._format_confluence(conf_pages),
            files_map=files_map,
@@ -368,7 +369,33 @@ class GraphAgentRuntime:
        return pages
    def _format_rag(self, items: list[dict]) -> str:
-        return "\n".join(str(x.get("content", "")) for x in items)
+        blocks: list[str] = []
        for item in items:
            source = str(item.get("source", "") or item.get("path", "") or "")
            layer = str(item.get("layer", "") or "").strip()
            title = str(item.get("title", "") or "").strip()
            metadata = item.get("metadata", {}) or {}
            lines = []
            if source:
                lines.append(f"Source: {source}")
            if layer:
                lines.append(f"Layer: {layer}")
            if title:
                lines.append(f"Title: {title}")
            if metadata:
                hints = []
                for key in ("module_id", "qname", "predicate", "entry_type", "framework", "section_path"):
                    value = metadata.get(key)
                    if value:
                        hints.append(f"{key}={value}")
                if hints:
                    lines.append("Meta: " + ", ".join(hints))
            content = str(item.get("content", "")).strip()
            if content:
                lines.append(content)
            if lines:
                blocks.append("\n".join(lines))
        return "\n\n".join(blocks)
    def _format_confluence(self, pages: list[dict]) -> str:
        return "\n".join(str(x.get("content_markdown", "")) for x in pages)
--- a/app/modules/application.py
+++ b/app/modules/application.py
@@ -3,7 +3,7 @@ from app.modules.agent.repository import AgentRepository
 from app.modules.agent.story_context_repository import StoryContextRepository, StoryContextSchemaRepository
 from app.modules.chat.repository import ChatRepository
 from app.modules.chat.module import ChatModule
-from app.modules.rag_session.repository import RagRepository
+from app.modules.rag.persistence.repository import RagRepository
 from app.modules.rag_session.module import RagModule
 from app.modules.rag_repo.module import RagRepoModule
 from app.modules.shared.bootstrap import bootstrap_database
--- a/app/modules/rag/README.md
+++ b/app/modules/rag/README.md
@@ -0,0 +1,300 @@
 # Модуль rag
 ## 1. Функции модуля
 - Единое ядро RAG для индексации и retrieval по документации и коду проекта.
 - Поддержка двух семейств индексации: `DOCS` и `CODE`, с разными слоями и разными pipeline.
 - Хранение `rag_session`, index-jobs, многослойных документов, cache-слоев и retrieval-запросов.
 - Поддержка индексации snapshot и changes с переиспользованием cache по `blob_sha`.
 - Предоставление контекста для agent/chat, где `DOCS` используется по умолчанию, а `CODE` включается для явных вопросов по реализации.
 ## 2. Диаграмма классов и взаимосвязей
 ```mermaid
 classDiagram
    class RagService
    class RagRepository
    class RagSchemaRepository
    class RagDocumentUpserter
    class DocsIndexingPipeline
    class CodeIndexingPipeline
    class RagQueryRouter
    class GigaChatEmbedder
    RagService --> RagRepository
    RagService --> DocsIndexingPipeline
    RagService --> CodeIndexingPipeline
    RagService --> RagQueryRouter
    RagService --> GigaChatEmbedder
    RagRepository --> RagSchemaRepository
    RagService --> RagDocumentUpserter
 ```
 ## 3. Описание классов
 - `RagService`: основной application-service модуля.
  Методы: `index_snapshot` — индексирует полный набор файлов; `index_changes` — применяет инкрементальные изменения; `retrieve` — возвращает релевантный контекст из `DOCS` или `CODE`.
 - `RagRepository`: фасад persistence-слоя RAG.
  Методы: `ensure_tables` — создает/обновляет схему; `upsert_session/get_session/session_exists` — операции по `rag_session`; `create_job/update_job/get_job` — операции по index jobs; `replace_documents/apply_document_changes` — операции по документам; `get_cached_documents/cache_documents` — работа с cache; `retrieve/fallback_chunks` — retrieval.
 - `RagSchemaRepository`: управление схемой БД для RAG.
  Методы: `ensure_tables` — создает таблицы и индексы; `_ensure_columns` — добавляет новые поля; `_ensure_indexes` — поддерживает индексы для retrieval и фильтрации.
 - `RagDocumentUpserter`: батчевый writer многослойных `RagDocument`.
  Методы: `replace` — полностью заменяет документы сессии; `apply_changes` — применяет upsert/delete по измененным путям.
 - `DocsIndexingPipeline`: pipeline индексации документации.
  Методы: `supports` — определяет, относится ли файл к docs; `index_file` — строит документы слоев `D1-D4` для одного файла.
 - `CodeIndexingPipeline`: pipeline индексации Python-кода.
  Методы: `supports` — определяет, относится ли файл к code; `index_file` — строит документы слоев `C0-C3` для одного файла.
 - `RagQueryRouter`: выбирает retrieval mode и активные слои.
  Методы: `resolve_mode` — определяет `docs` или `code`; `layers_for_mode` — возвращает набор слоев для retrieval.
 - `GigaChatEmbedder`: адаптер embeddings-модели.
  Методы: `embed` — возвращает embeddings для списка текстов.
 ## 4. Сиквенс-диаграммы API и выполнения
 ### Индексация snapshot через текущий `rag_session` facade
 Назначение: создать/обновить `rag_session` и построить многослойный индекс по переданным файлам проекта.
 ```mermaid
 sequenceDiagram
    participant Router as RagModule.APIRouter
    participant Sessions as RagSessionStore
    participant Indexing as IndexingOrchestrator
    participant Rag as RagService
    participant Docs as DocsIndexingPipeline
    participant Code as CodeIndexingPipeline
    participant Repo as RagRepository
    Router->>Sessions: create(project_id)
    Sessions-->>Router: rag_session_id
    Router->>Indexing: enqueue_snapshot(rag_session_id, files)
    Indexing->>Rag: index_snapshot(rag_session_id, files)
    loop for each file
        Rag->>Docs: supports/index_file
        Rag->>Code: supports/index_file
        Rag->>Repo: cache_documents(...)
    end
    Rag->>Repo: replace_documents(...)
    Indexing-->>Router: index_job_id,status
 ```
 ### Retrieval для agent/chat
 Назначение: вернуть релевантный контекст из нужного семейства слоев.
 ```mermaid
 sequenceDiagram
    participant Agent as GraphAgentRuntime
    participant Rag as RagService
    participant Router as RagQueryRouter
    participant Repo as RagRepository
    Agent->>Rag: retrieve(rag_session_id, query)
    Rag->>Router: resolve_mode(query)
    Router-->>Rag: docs|code + layers
    Rag->>Repo: retrieve(query_embedding, query_text, layers)
    Repo-->>Rag: ranked items
    Rag-->>Agent: items
 ```
 ## 5. Слои, фиксируемые в RAG
 ### 5.1. Слои DOCS
 #### `D1_MODULE_CATALOG`
 Назначение: каталог модулей документации и граф связей между ними.
 Основные атрибуты:
 - `module_id`
 - `type`
 - `domain`
 - `title`
 - `status`
 - `version`
 - `tags`
 - `owners`
 - `links`
 - `calls_api`
 - `called_by`
 - `uses_logic`
 - `used_by`
 - `reads_db`
 - `writes_db`
 - `integrates_with`
 - `emits_events`
 - `consumes_events`
 - `source_path`
 - `summary_text`
 #### `D2_FACT_INDEX`
 Назначение: атомарные факты `subject-predicate-object` с evidence.
 Основные атрибуты:
 - `fact_id`
 - `subject_id`
 - `predicate`
 - `object`
 - `object_ref`
 - `source_path`
 - `anchor`
 - `line_start`
 - `line_end`
 - `confidence`
 - `tags`
 #### `D3_SECTION_INDEX`
 Назначение: семантические секции документации, нарезанные по заголовкам.
 Основные атрибуты:
 - `chunk_id`
 - `module_id`
 - `section_path`
 - `section_title`
 - `content`
 - `source_path`
 - `order`
 - `tags`
 - `domain`
 - `type`
 - `embedding`
 #### `D4_POLICY_INDEX`
 Назначение: глобальные правила и конвенции проекта.
 Основные атрибуты:
 - `policy_id`
 - `applies_to`
 - `rules`
 - `default_behaviors`
 - `source_path`
 ### 5.2. Слои CODE
 #### `C0_SOURCE_CHUNKS`
 Назначение: сырой код как источник истины для цитирования и evidence.
 Основные атрибуты:
 - `lang`
 - `repo_id`
 - `commit_sha`
 - `path`
 - `span`
 - `title`
 - `text`
 - `module_or_unit`
 - `chunk_type`
 - `symbol_id`
 - `hash`
 #### `C1_SYMBOL_CATALOG`
 Назначение: каталог символов кода и их деклараций.
 Основные атрибуты:
 - `lang`
 - `repo_id`
 - `commit_sha`
 - `symbol_id`
 - `qname`
 - `kind`
 - `decl.path`
 - `decl.start_line`
 - `decl.end_line`
 - `text`
 - `visibility`
 - `signature`
 - `decorators_or_annotations`
 - `docstring_or_javadoc`
 - `parent_symbol_id`
 - `package_or_module`
 - `is_entry_candidate`
 - `lang_payload`
 #### `C2_DEPENDENCY_GRAPH`
 Назначение: связи между сущностями кода.
 Основные атрибуты:
 - `lang`
 - `repo_id`
 - `commit_sha`
 - `edge_id`
 - `edge_type`
 - `src_symbol_id`
 - `dst_symbol_id`
 - `dst_ref`
 - `evidence.path`
 - `evidence.start_line`
 - `evidence.end_line`
 - `text`
 - `resolution`
 - `callsite_kind`
 - `lang_payload`
 #### `C3_ENTRYPOINTS`
 Назначение: точки входа приложения и их обработчики.
 Основные атрибуты:
 - `lang`
 - `repo_id`
 - `commit_sha`
 - `entry_id`
 - `entry_type`
 - `framework`
 - `route_or_command`
 - `handler_symbol_id`
 - `evidence.path`
 - `evidence.start_line`
 - `evidence.end_line`
 - `text`
 - `http.methods`
 - `http.auth`
 - `request_model`
 - `response_model`
 - `cli.args_schema`
 - `task.queue`
 - `task.cron`
 - `tags`
 - `lang_payload`
 #### `C4_PUBLIC_API`
 Назначение: публичная поверхность API/экспортируемых символов.
 Основные атрибуты:
 - `api_id`
 - `symbol_id`
 - `stability`
 - `source_of_truth`
 - `versioning_tags`
 - `lang_payload`
 #### `C5_BEHAVIOR_SUMMARIES`
 Назначение: поведенческие summary с обязательными evidence links.
 Основные атрибуты:
 - `target_type`
 - `target_id`
 - `text`
 - `claims`
 - `evidence_links`
 - `confidence`
 - `generated_by`
 - `generated_at`
 #### `C6_RUNTIME_TRACES`
 Назначение: runtime/trace слой для связи кода и реального исполнения.
 Основные атрибуты:
 - `env`
 - `trace_id`
 - `span_id`
 - `symbol_id`
 - `entry_id`
 - `text`
 - `timings`
 - `service`
 - `host`
 - `labels`
 ## 6. Правила retrieval
 - По умолчанию retrieval идет в `DOCS`.
 - `CODE` используется только для явных вопросов по реализации, устройству кода, endpoint'ам, handler'ам и документации “из кода”.
 - Для `DOCS` приоритет слоев: `D1 -> D2 -> D3 -> D4`.
 - Для `CODE` приоритет слоев: `C3 -> C1 -> C2 -> C0`.
 ## 7. Текущий статус реализации
 - В первой итерации реализованы `DOCS D1-D4`.
 - В первой итерации реализованы `CODE C0-C3`.
 - `C4-C6` зафиксированы в контракте и зарезервированы под следующие этапы.
 - Текущие `rag_session` и `rag_repo` работают как facade/adapter поверх нового пакета `rag`.
--- a/app/modules/rag/init.py
+++ b/app/modules/rag/init.py
@@ -0,0 +1,25 @@
 from app.modules.rag.contracts import (
    DocKind,
    EvidenceLink,
    EvidenceType,
    RagDocument,
    RagLayer,
    RagSource,
    RagSpan,
    RetrievalItem,
    RetrievalMode,
    RetrievalQuery,
 )
 __all__ = [
    "DocKind",
    "EvidenceLink",
    "EvidenceType",
    "RagDocument",
    "RagLayer",
    "RagSource",
    "RagSpan",
    "RetrievalItem",
    "RetrievalMode",
    "RetrievalQuery",
 ]
--- a/app/modules/rag/pycache/init.cpython-312.pyc
+++ b/app/modules/rag/pycache/init.cpython-312.pyc
--- a/app/modules/rag/contracts/init.py
+++ b/app/modules/rag/contracts/init.py
@@ -0,0 +1,17 @@
 from app.modules.rag.contracts.documents import RagDocument, RagSource, RagSpan
 from app.modules.rag.contracts.enums import DocKind, EvidenceType, RagLayer, RetrievalMode
 from app.modules.rag.contracts.evidence import EvidenceLink
 from app.modules.rag.contracts.retrieval import RetrievalItem, RetrievalQuery
 __all__ = [
    "DocKind",
    "EvidenceLink",
    "EvidenceType",
    "RagDocument",
    "RagLayer",
    "RagSource",
    "RagSpan",
    "RetrievalItem",
    "RetrievalMode",
    "RetrievalQuery",
 ]
--- a/app/modules/rag/contracts/pycache/init.cpython-312.pyc
+++ b/app/modules/rag/contracts/pycache/init.cpython-312.pyc
--- a/app/modules/rag/contracts/pycache/documents.cpython-312.pyc
+++ b/app/modules/rag/contracts/pycache/documents.cpython-312.pyc
--- a/app/modules/rag/contracts/pycache/enums.cpython-312.pyc
+++ b/app/modules/rag/contracts/pycache/enums.cpython-312.pyc
--- a/app/modules/rag/contracts/pycache/evidence.cpython-312.pyc
+++ b/app/modules/rag/contracts/pycache/evidence.cpython-312.pyc
--- a/app/modules/rag/contracts/pycache/retrieval.cpython-312.pyc
+++ b/app/modules/rag/contracts/pycache/retrieval.cpython-312.pyc
--- a/app/modules/rag/contracts/documents.py
+++ b/app/modules/rag/contracts/documents.py
@@ -0,0 +1,77 @@
 from __future__ import annotations
 from dataclasses import asdict, dataclass, field
 from hashlib import sha256
 from app.modules.rag.contracts.evidence import EvidenceLink
@dataclass(slots=True)
 class RagSource:
    repo_id: str
    commit_sha: str | None
    path: str
    def to_dict(self) -> dict:
        return asdict(self)
@dataclass(slots=True)
 class RagSpan:
    start_line: int | None = None
    end_line: int | None = None
    def to_dict(self) -> dict:
        return asdict(self)
@dataclass(slots=True)
 class RagDocument:
    layer: str
    source: RagSource
    title: str
    text: str
    metadata: dict = field(default_factory=dict)
    links: list[EvidenceLink] = field(default_factory=list)
    span: RagSpan | None = None
    doc_id: str | None = None
    lang: str | None = None
    embedding: list[float] | None = None
    def ensure_doc_id(self) -> str:
        if self.doc_id:
            return self.doc_id
        span_key = ""
        if self.span is not None:
            span_key = f":{self.span.start_line}:{self.span.end_line}"
        raw = "|".join(
            [
                self.layer,
                self.lang or "",
                self.source.repo_id,
                self.source.commit_sha or "",
                self.source.path,
                self.metadata.get("symbol_id", "") or self.metadata.get("module_id", ""),
                self.title,
                span_key,
            ]
        )
        self.doc_id = sha256(raw.encode("utf-8")).hexdigest()
        return self.doc_id
    def to_record(self) -> dict:
        return {
            "doc_id": self.ensure_doc_id(),
            "layer": self.layer,
            "lang": self.lang,
            "repo_id": self.source.repo_id,
            "commit_sha": self.source.commit_sha,
            "path": self.source.path,
            "title": self.title,
            "text": self.text,
            "metadata": dict(self.metadata),
            "links": [link.to_dict() for link in self.links],
            "span_start": self.span.start_line if self.span else None,
            "span_end": self.span.end_line if self.span else None,
            "embedding": self.embedding or [],
        }
--- a/app/modules/rag/contracts/enums.py
+++ b/app/modules/rag/contracts/enums.py
@@ -0,0 +1,35 @@
 from __future__ import annotations
 class RagLayer:
    DOCS_MODULE_CATALOG = "D1_MODULE_CATALOG"
    DOCS_FACT_INDEX = "D2_FACT_INDEX"
    DOCS_SECTION_INDEX = "D3_SECTION_INDEX"
    DOCS_POLICY_INDEX = "D4_POLICY_INDEX"
    CODE_SOURCE_CHUNKS = "C0_SOURCE_CHUNKS"
    CODE_SYMBOL_CATALOG = "C1_SYMBOL_CATALOG"
    CODE_DEPENDENCY_GRAPH = "C2_DEPENDENCY_GRAPH"
    CODE_ENTRYPOINTS = "C3_ENTRYPOINTS"
    CODE_PUBLIC_API = "C4_PUBLIC_API"
    CODE_BEHAVIOR_SUMMARIES = "C5_BEHAVIOR_SUMMARIES"
    CODE_RUNTIME_TRACES = "C6_RUNTIME_TRACES"
 class RetrievalMode:
    DOCS = "docs"
    CODE = "code"
 class DocKind:
    SPEC = "spec"
    RUNBOOK = "runbook"
    README = "readme"
    MISC = "misc"
 class EvidenceType:
    CODE_SPAN = "code_span"
    SYMBOL = "symbol"
    EDGE = "edge"
    DOC_SECTION = "doc_section"
    DOC_FACT = "doc_fact"
--- a/app/modules/rag/contracts/evidence.py
+++ b/app/modules/rag/contracts/evidence.py
@@ -0,0 +1,16 @@
 from __future__ import annotations
 from dataclasses import asdict, dataclass
@dataclass(slots=True)
 class EvidenceLink:
    type: str
    target_id: str
    path: str | None = None
    start_line: int | None = None
    end_line: int | None = None
    note: str | None = None
    def to_dict(self) -> dict:
        return asdict(self)
--- a/app/modules/rag/contracts/retrieval.py
+++ b/app/modules/rag/contracts/retrieval.py
@@ -0,0 +1,23 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
@dataclass(slots=True)
 class RetrievalQuery:
    text: str
    mode: str
    limit: int = 5
    layers: list[str] = field(default_factory=list)
    path_prefixes: list[str] = field(default_factory=list)
    doc_kind: str | None = None
@dataclass(slots=True)
 class RetrievalItem:
    content: str
    path: str
    layer: str
    title: str
    score: float | None = None
    metadata: dict | None = None
--- a/app/modules/rag/indexing/code/pycache/file_filter.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/pycache/file_filter.cpython-312.pyc
--- a/app/modules/rag/indexing/code/pycache/pipeline.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/pycache/pipeline.cpython-312.pyc
--- a/app/modules/rag/indexing/code/code_text/pycache/chunker.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/code_text/pycache/chunker.cpython-312.pyc
--- a/app/modules/rag/indexing/code/code_text/pycache/document_builder.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/code_text/pycache/document_builder.cpython-312.pyc
--- a/app/modules/rag/indexing/code/code_text/chunker.py
+++ b/app/modules/rag/indexing/code/code_text/chunker.py
@@ -0,0 +1,57 @@
 from __future__ import annotations
 import ast
 from dataclasses import dataclass
@dataclass(slots=True)
 class CodeChunk:
    title: str
    text: str
    start_line: int
    end_line: int
    chunk_type: str
 class CodeTextChunker:
    def chunk(self, path: str, text: str) -> list[CodeChunk]:
        try:
            tree = ast.parse(text)
        except SyntaxError:
            return self._window_chunks(path, text)
        chunks: list[CodeChunk] = []
        lines = text.splitlines()
        for node in tree.body:
            if not isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
                continue
            start = int(getattr(node, "lineno", 1))
            end = int(getattr(node, "end_lineno", start))
            body = "\n".join(lines[start - 1 : end]).strip()
            if not body:
                continue
            chunks.append(
                CodeChunk(
                    title=f"{path}:{getattr(node, 'name', 'block')}",
                    text=body,
                    start_line=start,
                    end_line=end,
                    chunk_type="symbol_block",
                )
            )
        return chunks or self._window_chunks(path, text)
    def _window_chunks(self, path: str, text: str) -> list[CodeChunk]:
        lines = text.splitlines()
        chunks: list[CodeChunk] = []
        size = 80
        overlap = 15
        start = 0
        while start < len(lines):
            end = min(len(lines), start + size)
            body = "\n".join(lines[start:end]).strip()
            if body:
                chunks.append(CodeChunk(f"{path}:{start + 1}-{end}", body, start + 1, end, "window"))
            if end >= len(lines):
                break
            start = max(0, end - overlap)
        return chunks
--- a/app/modules/rag/indexing/code/code_text/document_builder.py
+++ b/app/modules/rag/indexing/code/code_text/document_builder.py
@@ -0,0 +1,22 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
 from app.modules.rag.indexing.code.code_text.chunker import CodeChunk
 class CodeTextDocumentBuilder:
    def build(self, source: RagSource, chunk: CodeChunk, *, chunk_index: int) -> RagDocument:
        return RagDocument(
            layer=RagLayer.CODE_SOURCE_CHUNKS,
            lang="python",
            source=source,
            title=chunk.title,
            text=chunk.text,
            span=RagSpan(chunk.start_line, chunk.end_line),
            metadata={
                "chunk_index": chunk_index,
                "chunk_type": chunk.chunk_type,
                "module_or_unit": source.path.replace("/", ".").removesuffix(".py"),
                "artifact_type": "CODE",
            },
        )
--- a/app/modules/rag/indexing/code/edges/pycache/document_builder.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/edges/pycache/document_builder.cpython-312.pyc
--- a/app/modules/rag/indexing/code/edges/pycache/extractor.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/edges/pycache/extractor.cpython-312.pyc
--- a/app/modules/rag/indexing/code/edges/document_builder.py
+++ b/app/modules/rag/indexing/code/edges/document_builder.py
@@ -0,0 +1,29 @@
 from __future__ import annotations
 from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
 from app.modules.rag.indexing.code.edges.extractor import PyEdge
 class EdgeDocumentBuilder:
    def build(self, source: RagSource, edge: PyEdge) -> RagDocument:
        dst = edge.dst_ref or edge.dst_symbol_id or "unknown"
        return RagDocument(
            layer=RagLayer.CODE_DEPENDENCY_GRAPH,
            lang="python",
            source=source,
            title=f"{edge.src_qname}:{edge.edge_type}",
            text=f"{edge.src_qname} {edge.edge_type} {dst}",
            span=RagSpan(edge.start_line, edge.end_line),
            metadata={
                "edge_id": edge.edge_id,
                "edge_type": edge.edge_type,
                "src_symbol_id": edge.src_symbol_id,
                "src_qname": edge.src_qname,
                "dst_symbol_id": edge.dst_symbol_id,
                "dst_ref": edge.dst_ref,
                "resolution": edge.resolution,
                "lang_payload": edge.metadata,
                "artifact_type": "CODE",
            },
            links=[EvidenceLink(type=EvidenceType.EDGE, target_id=edge.edge_id, path=source.path, start_line=edge.start_line, end_line=edge.end_line)],
        )
--- a/app/modules/rag/indexing/code/edges/extractor.py
+++ b/app/modules/rag/indexing/code/edges/extractor.py
@@ -0,0 +1,114 @@
 from __future__ import annotations
 import ast
 from dataclasses import dataclass, field
 from hashlib import sha256
@dataclass(slots=True)
 class PyEdge:
    edge_id: str
    edge_type: str
    src_symbol_id: str
    src_qname: str
    dst_symbol_id: str | None
    dst_ref: str | None
    path: str
    start_line: int
    end_line: int
    resolution: str = "partial"
    metadata: dict = field(default_factory=dict)
 class EdgeExtractor:
    def extract(self, path: str, ast_tree: ast.AST | None, symbols: list) -> list[PyEdge]:
        if ast_tree is None:
            return []
        qname_map = {symbol.qname: symbol.symbol_id for symbol in symbols}
        visitor = _EdgeVisitor(path, qname_map)
        visitor.visit(ast_tree)
        return visitor.edges
 class _EdgeVisitor(ast.NodeVisitor):
    def __init__(self, path: str, qname_map: dict[str, str]) -> None:
        self._path = path
        self._qname_map = qname_map
        self._scope: list[str] = []
        self.edges: list[PyEdge] = []
    def visit_ClassDef(self, node: ast.ClassDef) -> None:
        current = self._enter(node.name)
        for base in node.bases:
            self._add_edge("inherits", current, self._name(base), base)
        self.generic_visit(node)
        self._scope.pop()
    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
        self._visit_function(node)
    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
        self._visit_function(node)
    def visit_Import(self, node: ast.Import) -> None:
        current = self._current_qname()
        if not current:
            return
        for item in node.names:
            self._add_edge("imports", current, item.name, node)
    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
        current = self._current_qname()
        if not current:
            return
        module = node.module or ""
        for item in node.names:
            self._add_edge("imports", current, f"{module}.{item.name}".strip("."), node)
    def _visit_function(self, node) -> None:
        current = self._enter(node.name)
        for inner in ast.walk(node):
            if isinstance(inner, ast.Call):
                self._add_edge("calls", current, self._name(inner.func), inner, {"callsite_kind": "function_call"})
        self.generic_visit(node)
        self._scope.pop()
    def _enter(self, name: str) -> str:
        self._scope.append(name)
        return self._current_qname() or name
    def _current_qname(self) -> str | None:
        if not self._scope:
            return None
        return ".".join(self._scope)
    def _add_edge(self, edge_type: str, src_qname: str, dst_ref: str, node, extra: dict | None = None) -> None:
        if not dst_ref:
            return
        src_symbol_id = self._qname_map.get(src_qname, sha256(src_qname.encode("utf-8")).hexdigest())
        dst_symbol_id = self._qname_map.get(dst_ref)
        edge_id = sha256(f"{self._path}|{src_qname}|{edge_type}|{dst_ref}|{getattr(node, 'lineno', 1)}".encode("utf-8")).hexdigest()
        self.edges.append(
            PyEdge(
                edge_id=edge_id,
                edge_type=edge_type,
                src_symbol_id=src_symbol_id,
                src_qname=src_qname,
                dst_symbol_id=dst_symbol_id,
                dst_ref=dst_ref,
                path=self._path,
                start_line=int(getattr(node, "lineno", 1)),
                end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
                resolution="resolved" if dst_symbol_id else "partial",
                metadata=extra or {},
            )
        )
    def _name(self, node) -> str:
        if isinstance(node, ast.Name):
            return node.id
        if isinstance(node, ast.Attribute):
            return f"{self._name(node.value)}.{node.attr}"
        if isinstance(node, ast.Call):
            return self._name(node.func)
        return ""
--- a/app/modules/rag/indexing/code/entrypoints/pycache/document_builder.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/entrypoints/pycache/document_builder.cpython-312.pyc
--- a/app/modules/rag/indexing/code/entrypoints/pycache/fastapi_detector.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/entrypoints/pycache/fastapi_detector.cpython-312.pyc
--- a/app/modules/rag/indexing/code/entrypoints/pycache/flask_detector.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/entrypoints/pycache/flask_detector.cpython-312.pyc
--- a/app/modules/rag/indexing/code/entrypoints/pycache/registry.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/entrypoints/pycache/registry.cpython-312.pyc
--- a/app/modules/rag/indexing/code/entrypoints/pycache/typer_click_detector.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/entrypoints/pycache/typer_click_detector.cpython-312.pyc
--- a/app/modules/rag/indexing/code/entrypoints/document_builder.py
+++ b/app/modules/rag/indexing/code/entrypoints/document_builder.py
@@ -0,0 +1,26 @@
 from __future__ import annotations
 from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource, RagSpan
 from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
 class EntrypointDocumentBuilder:
    def build(self, source: RagSource, entrypoint: Entrypoint) -> RagDocument:
        return RagDocument(
            layer=RagLayer.CODE_ENTRYPOINTS,
            lang="python",
            source=source,
            title=entrypoint.route_or_command,
            text=f"{entrypoint.framework} {entrypoint.entry_type} {entrypoint.route_or_command}",
            span=RagSpan(entrypoint.start_line, entrypoint.end_line),
            metadata={
                "entry_id": entrypoint.entry_id,
                "entry_type": entrypoint.entry_type,
                "framework": entrypoint.framework,
                "route_or_command": entrypoint.route_or_command,
                "handler_symbol_id": entrypoint.handler_symbol_id,
                "lang_payload": entrypoint.metadata,
                "artifact_type": "CODE",
            },
            links=[EvidenceLink(type=EvidenceType.CODE_SPAN, target_id=entrypoint.entry_id, path=source.path, start_line=entrypoint.start_line, end_line=entrypoint.end_line)],
        )
--- a/app/modules/rag/indexing/code/entrypoints/fastapi_detector.py
+++ b/app/modules/rag/indexing/code/entrypoints/fastapi_detector.py
@@ -0,0 +1,34 @@
 from __future__ import annotations
 from hashlib import sha256
 from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
 class FastApiEntrypointDetector:
    _METHODS = {"get", "post", "put", "patch", "delete"}
    def detect(self, *, path: str, symbols: list) -> list[Entrypoint]:
        items: list[Entrypoint] = []
        for symbol in symbols:
            decorators = symbol.decorators or []
            for decorator in decorators:
                name = decorator.lower()
                tail = name.split(".")[-1]
                if tail not in self._METHODS and ".route" not in name:
                    continue
                route = decorator.split("(")[-1].rstrip(")") if "(" in decorator else decorator
                items.append(
                    Entrypoint(
                        entry_id=sha256(f"{path}|fastapi|{symbol.symbol_id}|{decorator}".encode("utf-8")).hexdigest(),
                        entry_type="http",
                        framework="fastapi",
                        route_or_command=route,
                        handler_symbol_id=symbol.symbol_id,
                        path=path,
                        start_line=symbol.start_line,
                        end_line=symbol.end_line,
                        metadata={"methods": [tail.upper()] if tail in self._METHODS else []},
                    )
                )
        return items
--- a/app/modules/rag/indexing/code/entrypoints/flask_detector.py
+++ b/app/modules/rag/indexing/code/entrypoints/flask_detector.py
@@ -0,0 +1,28 @@
 from __future__ import annotations
 from hashlib import sha256
 from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
 class FlaskEntrypointDetector:
    def detect(self, *, path: str, symbols: list) -> list[Entrypoint]:
        items: list[Entrypoint] = []
        for symbol in symbols:
            for decorator in symbol.decorators or []:
                lowered = decorator.lower()
                if ".route" not in lowered:
                    continue
                items.append(
                    Entrypoint(
                        entry_id=sha256(f"{path}|flask|{symbol.symbol_id}|{decorator}".encode("utf-8")).hexdigest(),
                        entry_type="http",
                        framework="flask",
                        route_or_command=decorator,
                        handler_symbol_id=symbol.symbol_id,
                        path=path,
                        start_line=symbol.start_line,
                        end_line=symbol.end_line,
                    )
                )
        return items
--- a/app/modules/rag/indexing/code/entrypoints/registry.py
+++ b/app/modules/rag/indexing/code/entrypoints/registry.py
@@ -0,0 +1,27 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
@dataclass(slots=True)
 class Entrypoint:
    entry_id: str
    entry_type: str
    framework: str
    route_or_command: str
    handler_symbol_id: str
    path: str
    start_line: int
    end_line: int
    metadata: dict = field(default_factory=dict)
 class EntrypointDetectorRegistry:
    def __init__(self, detectors: list) -> None:
        self._detectors = detectors
    def detect_all(self, *, path: str, symbols: list) -> list[Entrypoint]:
        items: list[Entrypoint] = []
        for detector in self._detectors:
            items.extend(detector.detect(path=path, symbols=symbols))
        return items
--- a/app/modules/rag/indexing/code/entrypoints/typer_click_detector.py
+++ b/app/modules/rag/indexing/code/entrypoints/typer_click_detector.py
@@ -0,0 +1,29 @@
 from __future__ import annotations
 from hashlib import sha256
 from app.modules.rag.indexing.code.entrypoints.registry import Entrypoint
 class TyperClickEntrypointDetector:
    def detect(self, *, path: str, symbols: list) -> list[Entrypoint]:
        items: list[Entrypoint] = []
        for symbol in symbols:
            for decorator in symbol.decorators or []:
                lowered = decorator.lower()
                if ".command" not in lowered and ".callback" not in lowered:
                    continue
                framework = "typer" if "typer" in lowered else "click"
                items.append(
                    Entrypoint(
                        entry_id=sha256(f"{path}|{framework}|{symbol.symbol_id}|{decorator}".encode("utf-8")).hexdigest(),
                        entry_type="cli",
                        framework=framework,
                        route_or_command=decorator,
                        handler_symbol_id=symbol.symbol_id,
                        path=path,
                        start_line=symbol.start_line,
                        end_line=symbol.end_line,
                    )
                )
        return items
--- a/app/modules/rag/indexing/code/file_filter.py
+++ b/app/modules/rag/indexing/code/file_filter.py
@@ -0,0 +1,13 @@
 from __future__ import annotations
 from pathlib import PurePosixPath
 class PythonFileFilter:
    _EXCLUDE_PARTS = {"venv", ".venv", "__pycache__", "node_modules", ".git", "dist", "build"}
    def should_index(self, path: str) -> bool:
        candidate = PurePosixPath(path)
        if candidate.suffix.lower() != ".py":
            return False
        return not any(part in self._EXCLUDE_PARTS for part in candidate.parts)
--- a/app/modules/rag/indexing/code/pipeline.py
+++ b/app/modules/rag/indexing/code/pipeline.py
@@ -0,0 +1,52 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagDocument, RagSource
 from app.modules.rag.indexing.code.code_text.chunker import CodeTextChunker
 from app.modules.rag.indexing.code.code_text.document_builder import CodeTextDocumentBuilder
 from app.modules.rag.indexing.code.edges.document_builder import EdgeDocumentBuilder
 from app.modules.rag.indexing.code.edges.extractor import EdgeExtractor
 from app.modules.rag.indexing.code.entrypoints.document_builder import EntrypointDocumentBuilder
 from app.modules.rag.indexing.code.entrypoints.fastapi_detector import FastApiEntrypointDetector
 from app.modules.rag.indexing.code.entrypoints.flask_detector import FlaskEntrypointDetector
 from app.modules.rag.indexing.code.entrypoints.registry import EntrypointDetectorRegistry
 from app.modules.rag.indexing.code.entrypoints.typer_click_detector import TyperClickEntrypointDetector
 from app.modules.rag.indexing.code.file_filter import PythonFileFilter
 from app.modules.rag.indexing.code.symbols.ast_parser import PythonAstParser
 from app.modules.rag.indexing.code.symbols.document_builder import SymbolDocumentBuilder
 from app.modules.rag.indexing.code.symbols.extractor import SymbolExtractor
 class CodeIndexingPipeline:
    def __init__(self) -> None:
        self._filter = PythonFileFilter()
        self._chunker = CodeTextChunker()
        self._code_builder = CodeTextDocumentBuilder()
        self._parser = PythonAstParser()
        self._symbols = SymbolExtractor()
        self._symbol_builder = SymbolDocumentBuilder()
        self._edges = EdgeExtractor()
        self._edge_builder = EdgeDocumentBuilder()
        self._entrypoints = EntrypointDetectorRegistry(
            [FastApiEntrypointDetector(), FlaskEntrypointDetector(), TyperClickEntrypointDetector()]
        )
        self._entrypoint_builder = EntrypointDocumentBuilder()
    def supports(self, path: str) -> bool:
        return self._filter.should_index(path)
    def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]:
        source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path)
        docs: list[RagDocument] = []
        code_chunks = self._chunker.chunk(path, content)
        for index, chunk in enumerate(code_chunks):
            docs.append(self._code_builder.build(source, chunk, chunk_index=index))
        tree = self._parser.parse_module(content)
        symbols = self._symbols.extract(path, content, tree)
        for symbol in symbols:
            docs.append(self._symbol_builder.build(source, symbol))
        edges = self._edges.extract(path, tree, symbols)
        for edge in edges:
            docs.append(self._edge_builder.build(source, edge))
        for entrypoint in self._entrypoints.detect_all(path=path, symbols=symbols):
            docs.append(self._entrypoint_builder.build(source, entrypoint))
        return docs
--- a/app/modules/rag/indexing/code/symbols/pycache/ast_parser.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/symbols/pycache/ast_parser.cpython-312.pyc
--- a/app/modules/rag/indexing/code/symbols/pycache/document_builder.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/symbols/pycache/document_builder.cpython-312.pyc
--- a/app/modules/rag/indexing/code/symbols/pycache/extractor.cpython-312.pyc
+++ b/app/modules/rag/indexing/code/symbols/pycache/extractor.cpython-312.pyc
--- a/app/modules/rag/indexing/code/symbols/ast_parser.py
+++ b/app/modules/rag/indexing/code/symbols/ast_parser.py
@@ -0,0 +1,11 @@
 from __future__ import annotations
 import ast
 class PythonAstParser:
    def parse_module(self, text: str) -> ast.AST | None:
        try:
            return ast.parse(text)
        except SyntaxError:
            return None
--- a/app/modules/rag/indexing/code/symbols/document_builder.py
+++ b/app/modules/rag/indexing/code/symbols/document_builder.py
@@ -0,0 +1,32 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
 from app.modules.rag.indexing.code.symbols.extractor import PySymbol
 class SymbolDocumentBuilder:
    def build(self, source: RagSource, symbol: PySymbol) -> RagDocument:
        body = [f"{symbol.kind} {symbol.qname}", symbol.signature]
        if symbol.docstring:
            body.append(symbol.docstring.strip())
        return RagDocument(
            layer=RagLayer.CODE_SYMBOL_CATALOG,
            lang="python",
            source=source,
            title=symbol.qname,
            text="\n".join(part for part in body if part),
            span=RagSpan(symbol.start_line, symbol.end_line),
            metadata={
                "symbol_id": symbol.symbol_id,
                "qname": symbol.qname,
                "kind": symbol.kind,
                "signature": symbol.signature,
                "decorators_or_annotations": symbol.decorators,
                "docstring_or_javadoc": symbol.docstring,
                "parent_symbol_id": symbol.parent_symbol_id,
                "package_or_module": source.path.replace("/", ".").removesuffix(".py"),
                "is_entry_candidate": bool(symbol.decorators),
                "lang_payload": symbol.lang_payload,
                "artifact_type": "CODE",
            },
        )
--- a/app/modules/rag/indexing/code/symbols/extractor.py
+++ b/app/modules/rag/indexing/code/symbols/extractor.py
@@ -0,0 +1,130 @@
 from __future__ import annotations
 import ast
 from dataclasses import dataclass, field
 from hashlib import sha256
@dataclass(slots=True)
 class PySymbol:
    symbol_id: str
    qname: str
    kind: str
    path: str
    start_line: int
    end_line: int
    signature: str
    decorators: list[str] = field(default_factory=list)
    docstring: str | None = None
    parent_symbol_id: str | None = None
    lang_payload: dict = field(default_factory=dict)
 class SymbolExtractor:
    def extract(self, path: str, text: str, ast_tree: ast.AST | None) -> list[PySymbol]:
        if ast_tree is None:
            return []
        collector = _SymbolVisitor(path)
        collector.visit(ast_tree)
        return collector.symbols
 class _SymbolVisitor(ast.NodeVisitor):
    def __init__(self, path: str) -> None:
        self._path = path
        self._stack: list[tuple[str, str]] = []
        self.symbols: list[PySymbol] = []
    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
        if self._stack:
            return
        module = node.module or ""
        for item in node.names:
            local_name = item.asname or item.name
            imported_name = f"{module}.{item.name}".strip(".")
            self.symbols.append(
                PySymbol(
                    symbol_id=sha256(f"{self._path}|{local_name}|import_alias".encode("utf-8")).hexdigest(),
                    qname=local_name,
                    kind="const",
                    path=self._path,
                    start_line=int(getattr(node, "lineno", 1)),
                    end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
                    signature=f"{local_name} = {imported_name}",
                    lang_payload={"imported_from": imported_name, "import_alias": True},
                )
            )
        self.generic_visit(node)
    def visit_Import(self, node: ast.Import) -> None:
        if self._stack:
            return
        for item in node.names:
            local_name = item.asname or item.name
            self.symbols.append(
                PySymbol(
                    symbol_id=sha256(f"{self._path}|{local_name}|import".encode("utf-8")).hexdigest(),
                    qname=local_name,
                    kind="const",
                    path=self._path,
                    start_line=int(getattr(node, "lineno", 1)),
                    end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
                    signature=f"import {item.name}",
                    lang_payload={"imported_from": item.name, "import_alias": bool(item.asname)},
                )
            )
        self.generic_visit(node)
    def visit_ClassDef(self, node: ast.ClassDef) -> None:
        self._add_symbol(node, "class", {"bases": [self._expr_name(base) for base in node.bases]})
        self.generic_visit(node)
        self._stack.pop()
    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
        self._add_function(node, is_async=False)
    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
        self._add_function(node, is_async=True)
    def _add_function(self, node, *, is_async: bool) -> None:
        kind = "method" if self._stack and self._stack[-1][0] == "class" else "function"
        self._add_symbol(node, kind, {"async": is_async})
        self.generic_visit(node)
        self._stack.pop()
    def _add_symbol(self, node, kind: str, lang_payload: dict) -> None:
        names = [name for _, name in self._stack] + [node.name]
        qname = ".".join(names)
        symbol_id = sha256(f"{self._path}|{qname}|{kind}".encode("utf-8")).hexdigest()
        signature = self._signature(node)
        symbol = PySymbol(
            symbol_id=symbol_id,
            qname=qname,
            kind=kind,
            path=self._path,
            start_line=int(getattr(node, "lineno", 1)),
            end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
            signature=signature,
            decorators=[self._expr_name(item) for item in getattr(node, "decorator_list", [])],
            docstring=ast.get_docstring(node),
            parent_symbol_id=self._stack[-1][1] if self._stack else None,
            lang_payload=lang_payload,
        )
        self.symbols.append(symbol)
        self._stack.append((kind, qname))
    def _signature(self, node) -> str:
        if isinstance(node, ast.ClassDef):
            bases = ", ".join(self._expr_name(base) for base in node.bases)
            return f"{node.name}({bases})" if bases else node.name
        args = [arg.arg for arg in getattr(node.args, "args", [])]
        return f"{node.name}({', '.join(args)})"
    def _expr_name(self, node) -> str:
        if isinstance(node, ast.Name):
            return node.id
        if isinstance(node, ast.Attribute):
            return f"{self._expr_name(node.value)}.{node.attr}"
        if isinstance(node, ast.Call):
            return self._expr_name(node.func)
        return ast.dump(node, include_attributes=False)
--- a/app/modules/rag/indexing/common/document_upserter.py
+++ b/app/modules/rag/indexing/common/document_upserter.py
@@ -0,0 +1,15 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagDocument
 from app.modules.rag.persistence.repository import RagRepository
 class RagDocumentUpserter:
    def __init__(self, repository: RagRepository) -> None:
        self._repository = repository
    def replace(self, rag_session_id: str, docs: list[RagDocument]) -> None:
        self._repository.replace_documents(rag_session_id, docs)
    def apply_changes(self, rag_session_id: str, delete_paths: list[str], docs: list[RagDocument]) -> None:
        self._repository.apply_document_changes(rag_session_id, delete_paths, docs)
--- a/app/modules/rag/indexing/common/report.py
+++ b/app/modules/rag/indexing/common/report.py
@@ -0,0 +1,21 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
@dataclass(slots=True)
 class IndexReport:
    indexed_files: int = 0
    failed_files: int = 0
    cache_hit_files: int = 0
    cache_miss_files: int = 0
    documents: int = 0
    warnings: list[str] = field(default_factory=list)
    def as_tuple(self) -> tuple[int, int, int, int]:
        return (
            self.indexed_files,
            self.failed_files,
            self.cache_hit_files,
            self.cache_miss_files,
        )
--- a/app/modules/rag/indexing/docs/pycache/classifier.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/pycache/classifier.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/pycache/document_builder.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/pycache/document_builder.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/pycache/file_filter.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/pycache/file_filter.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/pycache/pipeline.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/pycache/pipeline.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/chunkers/pycache/markdown_chunker.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/chunkers/pycache/markdown_chunker.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/chunkers/pycache/text_chunker.cpython-312.pyc
+++ b/app/modules/rag/indexing/docs/chunkers/pycache/text_chunker.cpython-312.pyc
--- a/app/modules/rag/indexing/docs/chunkers/markdown_chunker.py
+++ b/app/modules/rag/indexing/docs/chunkers/markdown_chunker.py
@@ -0,0 +1,77 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from app.modules.rag.indexing.docs.chunkers.text_chunker import DocTextChunker
@dataclass(slots=True)
 class SectionChunk:
    section_path: str
    section_title: str
    content: str
    order: int
 class MarkdownDocChunker:
    def __init__(self, text_chunker: DocTextChunker | None = None) -> None:
        self._fallback = text_chunker or DocTextChunker()
    def chunk(self, text: str) -> list[SectionChunk]:
        lines = text.splitlines()
        sections: list[SectionChunk] = []
        stack: list[tuple[int, str]] = []
        current_title = "Document"
        current_lines: list[str] = []
        order = 0
        for line in lines:
            heading = self._heading(line)
            if heading is None:
                current_lines.append(line)
                continue
            self._flush_section(sections, stack, current_title, current_lines, order)
            order += 1
            level, title = heading
            stack = [item for item in stack if item[0] < level]
            stack.append((level, title))
            current_title = title
            current_lines = []
        self._flush_section(sections, stack, current_title, current_lines, order)
        if sections:
            return sections
        chunks = self._fallback.split(text)
        return [
            SectionChunk(section_path="Document", section_title="Document", content=chunk, order=index)
            for index, chunk in enumerate(chunks)
        ]
    def _flush_section(
        self,
        sections: list[SectionChunk],
        stack: list[tuple[int, str]],
        current_title: str,
        current_lines: list[str],
        order: int,
    ) -> None:
        content = "\n".join(current_lines).strip()
        if not content:
            return
        titles = [title for _, title in stack] or [current_title]
        sections.append(
            SectionChunk(
                section_path=" > ".join(titles),
                section_title=titles[-1],
                content=content,
                order=order,
            )
        )
    def _heading(self, line: str) -> tuple[int, str] | None:
        stripped = line.strip()
        if not stripped.startswith("#"):
            return None
        level = len(stripped) - len(stripped.lstrip("#"))
        title = stripped[level:].strip()
        if not title:
            return None
        return level, title
--- a/app/modules/rag/indexing/docs/chunkers/text_chunker.py
+++ b/app/modules/rag/indexing/docs/chunkers/text_chunker.py
@@ -0,0 +1,21 @@
 from __future__ import annotations
 class DocTextChunker:
    def __init__(self, max_chars: int = 4000, overlap_chars: int = 250) -> None:
        self._max_chars = max_chars
        self._overlap_chars = overlap_chars
    def split(self, text: str) -> list[str]:
        cleaned = text.strip()
        if not cleaned:
            return []
        chunks: list[str] = []
        start = 0
        while start < len(cleaned):
            end = min(len(cleaned), start + self._max_chars)
            chunks.append(cleaned[start:end].strip())
            if end >= len(cleaned):
                break
            start = max(0, end - self._overlap_chars)
        return [chunk for chunk in chunks if chunk]
--- a/app/modules/rag/indexing/docs/classifier.py
+++ b/app/modules/rag/indexing/docs/classifier.py
@@ -0,0 +1,18 @@
 from __future__ import annotations
 from pathlib import PurePosixPath
 from app.modules.rag.contracts import DocKind
 class DocsClassifier:
    def classify(self, path: str) -> str:
        upper = PurePosixPath(path).name.upper()
        lowered = path.lower()
        if "runbook" in lowered or upper.startswith("RUNBOOK"):
            return DocKind.RUNBOOK
        if upper.startswith("README"):
            return DocKind.README
        if "spec" in lowered or "architecture" in lowered:
            return DocKind.SPEC
        return DocKind.MISC
--- a/app/modules/rag/indexing/docs/document_builder.py
+++ b/app/modules/rag/indexing/docs/document_builder.py
@@ -0,0 +1,115 @@
 from __future__ import annotations
 from hashlib import sha256
 from app.modules.rag.contracts import EvidenceLink, EvidenceType, RagDocument, RagLayer, RagSource
 from app.modules.rag.indexing.docs.chunkers.markdown_chunker import SectionChunk
 class DocsDocumentBuilder:
    def build_module_catalog(self, source: RagSource, frontmatter: dict, summary_text: str, doc_kind: str) -> RagDocument | None:
        module_id = str(frontmatter.get("id") or "").strip()
        module_type = str(frontmatter.get("type") or "").strip()
        domain = str(frontmatter.get("domain") or "").strip()
        if not module_id or not module_type or not domain:
            return None
        links = frontmatter.get("links") or {}
        metadata = {
            "module_id": module_id,
            "type": module_type,
            "domain": domain,
            "status": frontmatter.get("status"),
            "version": frontmatter.get("version"),
            "tags": frontmatter.get("tags") or [],
            "owners": frontmatter.get("owners") or [],
            "links": links,
            "source_path": source.path,
            "summary_text": summary_text[:4000],
            "doc_kind": doc_kind,
        }
        metadata.update({name: links.get(name, []) for name in (
            "calls_api", "called_by", "uses_logic", "used_by", "reads_db", "writes_db",
            "integrates_with", "emits_events", "consumes_events",
        )})
        return RagDocument(
            layer=RagLayer.DOCS_MODULE_CATALOG,
            source=source,
            title=module_id,
            text=summary_text[:4000] or module_id,
            metadata=metadata,
        )
    def build_section(self, source: RagSource, chunk: SectionChunk, frontmatter: dict, doc_kind: str) -> RagDocument:
        module_id = str(frontmatter.get("id") or source.path)
        metadata = {
            "module_id": module_id,
            "type": frontmatter.get("type"),
            "domain": frontmatter.get("domain"),
            "tags": frontmatter.get("tags") or [],
            "section_path": chunk.section_path,
            "section_title": chunk.section_title,
            "order": chunk.order,
            "doc_kind": doc_kind,
            "source_path": source.path,
            "artifact_type": "DOCS",
        }
        return RagDocument(
            layer=RagLayer.DOCS_SECTION_INDEX,
            source=source,
            title=f"{module_id}:{chunk.section_title}",
            text=chunk.content,
            metadata=metadata,
        )
    def build_policy(self, source: RagSource, frontmatter: dict, chunk: SectionChunk, doc_kind: str) -> RagDocument | None:
        policy_id = str(frontmatter.get("id") or "").strip()
        applies_to = frontmatter.get("applies_to") or frontmatter.get("type")
        if not policy_id:
            return None
        metadata = {
            "policy_id": policy_id,
            "applies_to": applies_to,
            "rules": chunk.content[:4000],
            "default_behaviors": frontmatter.get("default_behaviors") or [],
            "doc_kind": doc_kind,
            "section_path": chunk.section_path,
            "source_path": source.path,
        }
        return RagDocument(
            layer=RagLayer.DOCS_POLICY_INDEX,
            source=source,
            title=policy_id,
            text=chunk.content[:4000],
            metadata=metadata,
        )
    def build_fact(
        self,
        source: RagSource,
        *,
        subject_id: str,
        predicate: str,
        obj: str,
        object_ref: str | None,
        anchor: str,
        tags: list[str] | None = None,
    ) -> RagDocument:
        fact_id = sha256(f"{subject_id}|{predicate}|{obj}|{source.path}|{anchor}".encode("utf-8")).hexdigest()
        metadata = {
            "fact_id": fact_id,
            "subject_id": subject_id,
            "predicate": predicate,
            "object": obj,
            "object_ref": object_ref,
            "anchor": anchor,
            "tags": tags or [],
            "source_path": source.path,
        }
        return RagDocument(
            layer=RagLayer.DOCS_FACT_INDEX,
            source=source,
            title=f"{subject_id}:{predicate}",
            text=f"{subject_id} {predicate} {obj}".strip(),
            metadata=metadata,
            links=[EvidenceLink(type=EvidenceType.DOC_FACT, target_id=fact_id, path=source.path, note=anchor)],
        )
--- a/app/modules/rag/indexing/docs/file_filter.py
+++ b/app/modules/rag/indexing/docs/file_filter.py
@@ -0,0 +1,21 @@
 from __future__ import annotations
 from pathlib import PurePosixPath
 class DocsFileFilter:
    _EXTENSIONS = {".md", ".rst", ".txt", ".adoc"}
    _NAMES = ("README", "CHANGELOG", "CONTRIBUTING", "ARCHITECTURE", "SPEC", "RUNBOOK")
    _EXCLUDE_PARTS = {"node_modules", ".git", "vendor", "dist", "build", "target", "__pycache__"}
    def should_index(self, path: str) -> bool:
        candidate = PurePosixPath(path)
        if any(part in self._EXCLUDE_PARTS for part in candidate.parts):
            return False
        if candidate.suffix.lower() in self._EXTENSIONS:
            return True
        upper_name = candidate.name.upper()
        if any(upper_name.startswith(prefix) for prefix in self._NAMES):
            return True
        joined = "/".join(candidate.parts).lower()
        return any(marker in joined for marker in ("docs/", "doc/", "documentation/"))
--- a/app/modules/rag/indexing/docs/pipeline.py
+++ b/app/modules/rag/indexing/docs/pipeline.py
@@ -0,0 +1,135 @@
 from __future__ import annotations
 import re
 import yaml
 from app.modules.rag.contracts import RagDocument, RagSource
 from app.modules.rag.indexing.docs.chunkers.markdown_chunker import MarkdownDocChunker
 from app.modules.rag.indexing.docs.classifier import DocsClassifier
 from app.modules.rag.indexing.docs.document_builder import DocsDocumentBuilder
 from app.modules.rag.indexing.docs.file_filter import DocsFileFilter
 class DocsIndexingPipeline:
    def __init__(self) -> None:
        self._filter = DocsFileFilter()
        self._classifier = DocsClassifier()
        self._chunker = MarkdownDocChunker()
        self._builder = DocsDocumentBuilder()
    def supports(self, path: str) -> bool:
        return self._filter.should_index(path)
    def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]:
        source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path)
        frontmatter, body = self._split_frontmatter(content)
        doc_kind = self._classifier.classify(path)
        sections = self._chunker.chunk(body)
        summary_text = self._summary_from_sections(sections)
        docs: list[RagDocument] = []
        module_doc = self._builder.build_module_catalog(source, frontmatter, summary_text, doc_kind)
        if module_doc is not None:
            docs.append(module_doc)
        for section in sections:
            docs.append(self._builder.build_section(source, section, frontmatter, doc_kind))
        if str(frontmatter.get("type") or "").strip() == "policy":
            for section in sections[:1]:
                policy = self._builder.build_policy(source, frontmatter, section, doc_kind)
                if policy is not None:
                    docs.append(policy)
        docs.extend(self._extract_facts(source, frontmatter, sections))
        return docs
    def _split_frontmatter(self, content: str) -> tuple[dict, str]:
        if not content.startswith("---\n"):
            return {}, content
        _, raw, body = content.split("---", 2)
        payload = yaml.safe_load(raw) or {}
        return payload if isinstance(payload, dict) else {}, body.strip()
    def _summary_from_sections(self, sections) -> str:
        text = "\n\n".join(section.content for section in sections[:2]).strip()
        return text[:4000]
    def _extract_facts(self, source: RagSource, frontmatter: dict, sections) -> list[RagDocument]:
        subject_id = str(frontmatter.get("id") or source.path)
        docs: list[RagDocument] = []
        links = frontmatter.get("links") or {}
        for predicate, values in links.items():
            for value in values or []:
                docs.append(
                    self._builder.build_fact(
                        source,
                        subject_id=subject_id,
                        predicate=predicate,
                        obj=str(value),
                        object_ref=str(value),
                        anchor="frontmatter.links",
                    )
                )
        for section in sections:
            docs.extend(self._facts_from_table(source, subject_id, section.section_title, section.content))
            docs.extend(self._facts_from_lists(source, subject_id, section.section_title, section.content))
        return docs
    def _facts_from_table(self, source: RagSource, subject_id: str, title: str, content: str) -> list[RagDocument]:
        lines = [line.strip() for line in content.splitlines() if line.strip()]
        if len(lines) < 3 or "|" not in lines[0]:
            return []
        headers = [part.strip().lower() for part in lines[0].strip("|").split("|")]
        if not all(headers):
            return []
        docs: list[RagDocument] = []
        for row in lines[2:]:
            if "|" not in row:
                continue
            values = [part.strip() for part in row.strip("|").split("|")]
            if len(values) != len(headers):
                continue
            payload = dict(zip(headers, values))
            docs.extend(self._facts_from_row(source, subject_id, title, payload))
        return docs
    def _facts_from_row(self, source: RagSource, subject_id: str, title: str, payload: dict) -> list[RagDocument]:
        docs: list[RagDocument] = []
        name = payload.get("field") or payload.get("name") or payload.get("column") or payload.get("code")
        if "request" in title.lower() or "response" in title.lower():
            if name:
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="has_field", obj=name, object_ref=None, anchor=title))
            if payload.get("required"):
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="field_required", obj=f"{name}:{payload['required']}", object_ref=None, anchor=title))
            if payload.get("type"):
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="field_type", obj=f"{name}:{payload['type']}", object_ref=None, anchor=title))
            if payload.get("validation"):
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="field_validation", obj=f"{name}:{payload['validation']}", object_ref=None, anchor=title))
        if "error" in title.lower():
            if payload.get("status"):
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="returns_status", obj=payload["status"], object_ref=None, anchor=title))
            if payload.get("error") or payload.get("code"):
                error_value = payload.get("error") or payload.get("code")
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="returns_error", obj=error_value, object_ref=None, anchor=title))
            if payload.get("client action"):
                docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="client_action", obj=payload["client action"], object_ref=None, anchor=title))
        if "constraint" in title.lower() and name:
            docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate="has_constraint", obj=name, object_ref=None, anchor=title))
        return docs
    def _facts_from_lists(self, source: RagSource, subject_id: str, title: str, content: str) -> list[RagDocument]:
        docs: list[RagDocument] = []
        for line in content.splitlines():
            item = line.strip()
            if not item.startswith(("-", "*", "1.", "2.", "3.")):
                continue
            normalized = re.sub(r"^[-*0-9. ]+", "", item).strip()
            lowered = normalized.lower()
            if lowered.startswith("metric:"):
                predicate = "emits_metric"
            elif lowered.startswith("event:"):
                predicate = "emits_analytics_event"
            elif lowered.startswith("log:"):
                predicate = "logs_event"
            else:
                predicate = "validates_rule" if "rule" in title.lower() else "client_action"
            docs.append(self._builder.build_fact(source, subject_id=subject_id, predicate=predicate, obj=normalized, object_ref=None, anchor=title))
        return docs
--- a/app/modules/rag/ingestion/session/pycache/local_project_reader.cpython-312.pyc
+++ b/app/modules/rag/ingestion/session/pycache/local_project_reader.cpython-312.pyc
--- a/app/modules/rag/persistence/pycache/query_repository.cpython-312.pyc
+++ b/app/modules/rag/persistence/pycache/query_repository.cpython-312.pyc
--- a/app/modules/rag/persistence/cache_repository.py
+++ b/app/modules/rag/persistence/cache_repository.py
@@ -0,0 +1,189 @@
 from __future__ import annotations
 import json
 from sqlalchemy import text
 from app.modules.rag.contracts import EvidenceLink, RagDocument, RagSource, RagSpan
 from app.modules.rag.contracts.enums import RagLayer
 class RagCacheRepository:
    def get_cached_documents(self, repo_id: str, blob_sha: str) -> list[RagDocument]:
        with self._engine().connect() as conn:
            rows = conn.execute(
                text(
                    """
                    SELECT layer, lang, path, title, content, metadata_json, links_json, span_start, span_end,
                           repo_id, commit_sha, embedding::text AS embedding_txt
                    FROM rag_chunk_cache
                    WHERE repo_id = :repo_id AND blob_sha = :blob_sha
                    ORDER BY chunk_index ASC
                    """
                ),
                {"repo_id": repo_id, "blob_sha": blob_sha},
            ).mappings().fetchall()
        docs: list[RagDocument] = []
        for row in rows:
            metadata = self._loads(row.get("metadata_json"))
            docs.append(
                RagDocument(
                    layer=str(row["layer"]),
                    lang=row.get("lang"),
                    source=RagSource(
                        repo_id=str(row["repo_id"]),
                        commit_sha=row.get("commit_sha"),
                        path=str(row["path"]),
                    ),
                    title=str(row["title"] or row["path"]),
                    text=str(row["content"] or ""),
                    metadata=metadata,
                    links=[EvidenceLink(**item) for item in self._loads(row.get("links_json"), default=[])],
                    span=RagSpan(row.get("span_start"), row.get("span_end")),
                    embedding=self._parse_vector(str(row["embedding_txt"] or "")),
                )
            )
        return docs
    def cache_documents(self, repo_id: str, path: str, blob_sha: str, docs: list[RagDocument]) -> None:
        if not docs:
            return
        with self._engine().connect() as conn:
            first = docs[0].to_record()
            first_meta = first["metadata"]
            conn.execute(
                text(
                    """
                    INSERT INTO rag_blob_cache (
                        repo_id, blob_sha, path, artifact_type, section, doc_id, doc_version, owner,
                        system_component, last_modified, staleness_score, layer, lang, metadata_json
                    )
                    VALUES (
                        :repo_id, :blob_sha, :path, :artifact_type, :section, :doc_id, :doc_version, :owner,
                        :system_component, :last_modified, :staleness_score, :layer, :lang, :metadata_json
                    )
                    ON CONFLICT (repo_id, blob_sha, path) DO UPDATE SET
                        artifact_type = EXCLUDED.artifact_type,
                        section = EXCLUDED.section,
                        doc_id = EXCLUDED.doc_id,
                        doc_version = EXCLUDED.doc_version,
                        owner = EXCLUDED.owner,
                        system_component = EXCLUDED.system_component,
                        last_modified = EXCLUDED.last_modified,
                        staleness_score = EXCLUDED.staleness_score,
                        layer = EXCLUDED.layer,
                        lang = EXCLUDED.lang,
                        metadata_json = EXCLUDED.metadata_json,
                        updated_at = CURRENT_TIMESTAMP
                    """
                ),
                {
                    "repo_id": repo_id,
                    "blob_sha": blob_sha,
                    "path": path,
                    "artifact_type": first_meta.get("artifact_type"),
                    "section": first_meta.get("section") or first_meta.get("section_title"),
                    "doc_id": first_meta.get("doc_id"),
                    "doc_version": first_meta.get("doc_version"),
                    "owner": first_meta.get("owner"),
                    "system_component": first_meta.get("system_component"),
                    "last_modified": first_meta.get("last_modified"),
                    "staleness_score": first_meta.get("staleness_score"),
                    "layer": first["layer"],
                    "lang": first["lang"],
                    "metadata_json": json.dumps(first_meta, ensure_ascii=True),
                },
            )
            conn.execute(
                text("DELETE FROM rag_chunk_cache WHERE repo_id = :repo_id AND blob_sha = :blob_sha"),
                {"repo_id": repo_id, "blob_sha": blob_sha},
            )
            for idx, doc in enumerate(docs):
                row = doc.to_record()
                metadata = row["metadata"]
                emb = row["embedding"] or []
                emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_chunk_cache (
                            repo_id, blob_sha, chunk_index, content, embedding, section, layer, lang, path, title,
                            metadata_json, links_json, span_start, span_end, commit_sha
                        )
                        VALUES (
                            :repo_id, :blob_sha, :chunk_index, :content, CAST(:embedding AS vector), :section, :layer,
                            :lang, :path, :title, :metadata_json, :links_json, :span_start, :span_end, :commit_sha
                        )
                        """
                    ),
                    {
                        "repo_id": repo_id,
                        "blob_sha": blob_sha,
                        "chunk_index": idx,
                        "content": row["text"],
                        "embedding": emb_str,
                        "section": metadata.get("section") or metadata.get("section_title"),
                        "layer": row["layer"],
                        "lang": row["lang"],
                        "path": row["path"],
                        "title": row["title"],
                        "metadata_json": json.dumps(metadata, ensure_ascii=True),
                        "links_json": json.dumps(row["links"], ensure_ascii=True),
                        "span_start": row["span_start"],
                        "span_end": row["span_end"],
                        "commit_sha": row["commit_sha"],
                    },
                )
            conn.commit()
    def record_repo_cache(
        self,
        *,
        project_id: str,
        commit_sha: str | None,
        changed_files: list[str],
        summary: str,
    ) -> None:
        docs: list[RagDocument] = []
        for idx, path in enumerate(changed_files):
            docs.append(
                RagDocument(
                    layer=RagLayer.CODE_SOURCE_CHUNKS,
                    lang="python" if path.endswith(".py") else None,
                    source=RagSource(project_id, commit_sha, path),
                    title=path,
                    text=f"repo_webhook:{path}:{summary[:300]}",
                    metadata={"chunk_index": idx, "artifact_type": "CODE", "section": "repo_webhook"},
                )
            )
        for doc in docs:
            blob_sha = self._blob_sha(commit_sha, doc.source.path)
            doc.metadata["blob_sha"] = blob_sha
            self.cache_documents(project_id, doc.source.path, blob_sha, [doc])
    def _blob_sha(self, commit_sha: str | None, path: str) -> str:
        from hashlib import sha256
        return sha256(f"{commit_sha or 'no-commit'}:{path}".encode("utf-8")).hexdigest()
    def _engine(self):
        from app.modules.shared.db import get_engine
        return get_engine()
    def _loads(self, value, default=None):
        if default is None:
            default = {}
        if not value:
            return default
        return json.loads(str(value))
    def _parse_vector(self, value: str) -> list[float]:
        text_value = value.strip()
        if not text_value:
            return []
        if text_value.startswith("[") and text_value.endswith("]"):
            text_value = text_value[1:-1]
        if not text_value:
            return []
        return [float(part.strip()) for part in text_value.split(",") if part.strip()]
--- a/app/modules/rag/persistence/document_repository.py
+++ b/app/modules/rag/persistence/document_repository.py
@@ -0,0 +1,122 @@
 from __future__ import annotations
 import json
 from sqlalchemy import text
 from app.modules.rag.contracts import RagDocument
 class RagDocumentRepository:
    def replace_documents(self, conn, rag_session_id: str, docs: list[RagDocument]) -> None:
        conn.execute(text("DELETE FROM rag_chunks WHERE rag_session_id = :sid"), {"sid": rag_session_id})
        conn.execute(text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid"), {"sid": rag_session_id})
        self.insert_documents(conn, rag_session_id, docs)
    def apply_document_changes(
        self,
        conn,
        rag_session_id: str,
        delete_paths: list[str],
        docs: list[RagDocument],
    ) -> None:
        if delete_paths:
            conn.execute(
                text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                {"sid": rag_session_id, "paths": delete_paths},
            )
            conn.execute(
                text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                {"sid": rag_session_id, "paths": delete_paths},
            )
        if not docs:
            return
        paths = sorted({doc.source.path for doc in docs})
        conn.execute(
            text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"),
            {"sid": rag_session_id, "paths": paths},
        )
        conn.execute(
            text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"),
            {"sid": rag_session_id, "paths": paths},
        )
        self.insert_documents(conn, rag_session_id, docs)
    def insert_documents(self, conn, rag_session_id: str, docs: list[RagDocument]) -> None:
        for doc in docs:
            row = doc.to_record()
            metadata = row["metadata"]
            links = row["links"]
            emb = row["embedding"] or []
            emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None
            conn.execute(
                text(
                    """
                    INSERT INTO rag_chunks (
                        rag_session_id, path, chunk_index, content, embedding, artifact_type, section, doc_id,
                        doc_version, owner, system_component, last_modified, staleness_score, created_at, updated_at,
                        rag_doc_id, layer, lang, repo_id, commit_sha, title, metadata_json, links_json, span_start,
                        span_end, symbol_id, qname, kind, framework, entrypoint_type, module_id, section_path, doc_kind
                    )
                    VALUES (
                        :sid, :path, :chunk_index, :content, CAST(:emb AS vector), :artifact_type, :section, :doc_id,
                        :doc_version, :owner, :system_component, :last_modified, :staleness_score, CURRENT_TIMESTAMP,
                        CURRENT_TIMESTAMP, :rag_doc_id, :layer, :lang, :repo_id, :commit_sha, :title, :metadata_json,
                        :links_json, :span_start, :span_end, :symbol_id, :qname, :kind, :framework, :entrypoint_type,
                        :module_id, :section_path, :doc_kind
                    )
                    """
                ),
                {
                    "sid": rag_session_id,
                    "path": row["path"],
                    "chunk_index": int(metadata.get("chunk_index", 0)),
                    "content": row["text"],
                    "emb": emb_str,
                    "artifact_type": metadata.get("artifact_type"),
                    "section": metadata.get("section") or metadata.get("section_title"),
                    "doc_id": metadata.get("doc_id"),
                    "doc_version": metadata.get("doc_version"),
                    "owner": metadata.get("owner"),
                    "system_component": metadata.get("system_component"),
                    "last_modified": metadata.get("last_modified"),
                    "staleness_score": metadata.get("staleness_score"),
                    "rag_doc_id": row["doc_id"],
                    "layer": row["layer"],
                    "lang": row["lang"],
                    "repo_id": row["repo_id"],
                    "commit_sha": row["commit_sha"],
                    "title": row["title"],
                    "metadata_json": json.dumps(metadata, ensure_ascii=True),
                    "links_json": json.dumps(links, ensure_ascii=True),
                    "span_start": row["span_start"],
                    "span_end": row["span_end"],
                    "symbol_id": metadata.get("symbol_id"),
                    "qname": metadata.get("qname"),
                    "kind": metadata.get("kind") or metadata.get("type"),
                    "framework": metadata.get("framework"),
                    "entrypoint_type": metadata.get("entry_type") or metadata.get("entrypoint_type"),
                    "module_id": metadata.get("module_id") or metadata.get("policy_id"),
                    "section_path": metadata.get("section_path"),
                    "doc_kind": metadata.get("doc_kind"),
                },
            )
            repo_id = str(row["repo_id"] or "").strip()
            blob_sha = str(metadata.get("blob_sha") or "").strip()
            if repo_id and blob_sha:
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_session_chunk_map (
                            rag_session_id, repo_id, blob_sha, chunk_index, path
                        ) VALUES (:sid, :repo_id, :blob_sha, :chunk_index, :path)
                        """
                    ),
                    {
                        "sid": rag_session_id,
                        "repo_id": repo_id,
                        "blob_sha": blob_sha,
                        "chunk_index": int(metadata.get("chunk_index", 0)),
                        "path": row["path"],
                    },
                )
--- a/app/modules/rag/persistence/job_repository.py
+++ b/app/modules/rag/persistence/job_repository.py
@@ -0,0 +1,95 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from sqlalchemy import text
 from app.modules.shared.db import get_engine
@dataclass
 class RagJobRow:
    index_job_id: str
    rag_session_id: str
    status: str
    indexed_files: int
    failed_files: int
    cache_hit_files: int
    cache_miss_files: int
    error_code: str | None
    error_desc: str | None
    error_module: str | None
 class RagJobRepository:
    def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    INSERT INTO rag_index_jobs (index_job_id, rag_session_id, status)
                    VALUES (:jid, :sid, :status)
                    """
                ),
                {"jid": index_job_id, "sid": rag_session_id, "status": status},
            )
            conn.commit()
    def update_job(
        self,
        index_job_id: str,
        *,
        status: str,
        indexed_files: int,
        failed_files: int,
        cache_hit_files: int = 0,
        cache_miss_files: int = 0,
        error_code: str | None = None,
        error_desc: str | None = None,
        error_module: str | None = None,
    ) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    UPDATE rag_index_jobs
                    SET status = :status,
                        indexed_files = :indexed,
                        failed_files = :failed,
                        cache_hit_files = :cache_hit_files,
                        cache_miss_files = :cache_miss_files,
                        error_code = :ecode,
                        error_desc = :edesc,
                        error_module = :emodule,
                        updated_at = CURRENT_TIMESTAMP
                    WHERE index_job_id = :jid
                    """
                ),
                {
                    "jid": index_job_id,
                    "status": status,
                    "indexed": indexed_files,
                    "failed": failed_files,
                    "cache_hit_files": cache_hit_files,
                    "cache_miss_files": cache_miss_files,
                    "ecode": error_code,
                    "edesc": error_desc,
                    "emodule": error_module,
                },
            )
            conn.commit()
    def get_job(self, index_job_id: str) -> RagJobRow | None:
        with get_engine().connect() as conn:
            row = conn.execute(
                text(
                    """
                    SELECT index_job_id, rag_session_id, status, indexed_files, failed_files,
                           cache_hit_files, cache_miss_files, error_code, error_desc, error_module
                    FROM rag_index_jobs
                    WHERE index_job_id = :jid
                    """
                ),
                {"jid": index_job_id},
            ).mappings().fetchone()
        return RagJobRow(**dict(row)) if row else None
--- a/app/modules/rag/persistence/query_repository.py
+++ b/app/modules/rag/persistence/query_repository.py
@@ -0,0 +1,111 @@
 from __future__ import annotations
 import json
 from sqlalchemy import text
 from app.modules.rag.retrieval.query_terms import extract_query_terms
 from app.modules.shared.db import get_engine
 class RagQueryRepository:
    def retrieve(
        self,
        rag_session_id: str,
        query_embedding: list[float],
        *,
        query_text: str = "",
        limit: int = 5,
        layers: list[str] | None = None,
        path_prefixes: list[str] | None = None,
        prefer_non_tests: bool = False,
    ) -> list[dict]:
        emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
        filters = ["rag_session_id = :sid"]
        params: dict = {"sid": rag_session_id, "emb": emb, "lim": limit}
        if layers:
            filters.append("layer = ANY(:layers)")
            params["layers"] = layers
        if path_prefixes:
            or_filters = []
            for idx, prefix in enumerate(path_prefixes):
                key = f"path_{idx}"
                params[key] = f"{prefix}%"
                or_filters.append(f"path LIKE :{key}")
            filters.append("(" + " OR ".join(or_filters) + ")")
        term_filters = []
        terms = extract_query_terms(query_text)
        for idx, term in enumerate(terms):
            exact_key = f"term_exact_{idx}"
            prefix_key = f"term_prefix_{idx}"
            contains_key = f"term_contains_{idx}"
            params[exact_key] = term
            params[prefix_key] = f"{term}%"
            params[contains_key] = f"%{term}%"
            term_filters.append(
                "CASE "
                f"WHEN lower(COALESCE(qname, '')) = :{exact_key} THEN 0 "
                f"WHEN lower(COALESCE(symbol_id, '')) = :{exact_key} THEN 1 "
                f"WHEN lower(COALESCE(title, '')) = :{exact_key} THEN 2 "
                f"WHEN lower(COALESCE(qname, '')) LIKE :{prefix_key} THEN 3 "
                f"WHEN lower(COALESCE(title, '')) LIKE :{prefix_key} THEN 4 "
                f"WHEN lower(COALESCE(path, '')) LIKE :{contains_key} THEN 5 "
                f"WHEN lower(COALESCE(content, '')) LIKE :{contains_key} THEN 6 "
                "ELSE 100 END"
            )
        lexical_sql = "LEAST(" + ", ".join(term_filters) + ")" if term_filters else "100"
        test_penalty_sql = (
            "CASE "
            "WHEN lower(path) LIKE 'tests/%' OR lower(path) LIKE '%/tests/%' OR lower(path) LIKE 'test_%' OR lower(path) LIKE '%/test_%' "
            "THEN 1 ELSE 0 END"
            if prefer_non_tests
            else "0"
        )
        layer_rank_sql = (
            "CASE "
            "WHEN layer = 'C3_ENTRYPOINTS' THEN 0 "
            "WHEN layer = 'C1_SYMBOL_CATALOG' THEN 1 "
            "WHEN layer = 'C2_DEPENDENCY_GRAPH' THEN 2 "
            "WHEN layer = 'C0_SOURCE_CHUNKS' THEN 3 "
            "WHEN layer = 'D1_MODULE_CATALOG' THEN 0 "
            "WHEN layer = 'D2_FACT_INDEX' THEN 1 "
            "WHEN layer = 'D3_SECTION_INDEX' THEN 2 "
            "WHEN layer = 'D4_POLICY_INDEX' THEN 3 "
            "ELSE 10 END"
        )
        sql = f"""
            SELECT path, content, layer, title, metadata_json, span_start, span_end,
                   {lexical_sql} AS lexical_rank,
                   {test_penalty_sql} AS test_penalty,
                   {layer_rank_sql} AS layer_rank,
                   (embedding <=> CAST(:emb AS vector)) AS distance
            FROM rag_chunks
            WHERE {' AND '.join(filters)}
            ORDER BY lexical_rank ASC, test_penalty ASC, layer_rank ASC, embedding <=> CAST(:emb AS vector)
            LIMIT :lim
        """
        with get_engine().connect() as conn:
            rows = conn.execute(text(sql), params).mappings().fetchall()
        return [self._row_to_dict(row) for row in rows]
    def fallback_chunks(self, rag_session_id: str, *, limit: int = 5, layers: list[str] | None = None) -> list[dict]:
        filters = ["rag_session_id = :sid"]
        params: dict = {"sid": rag_session_id, "lim": limit}
        if layers:
            filters.append("layer = ANY(:layers)")
            params["layers"] = layers
        sql = f"""
            SELECT path, content, layer, title, metadata_json, span_start, span_end
            FROM rag_chunks
            WHERE {' AND '.join(filters)}
            ORDER BY id DESC
            LIMIT :lim
        """
        with get_engine().connect() as conn:
            rows = conn.execute(text(sql), params).mappings().fetchall()
        return [self._row_to_dict(row) for row in rows]
    def _row_to_dict(self, row) -> dict:
        data = dict(row)
        data["metadata"] = json.loads(str(data.pop("metadata_json") or "{}"))
        return data
--- a/app/modules/rag/persistence/repository.py
+++ b/app/modules/rag/persistence/repository.py
@@ -0,0 +1,82 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagDocument
 from app.modules.rag.persistence.cache_repository import RagCacheRepository
 from app.modules.rag.persistence.document_repository import RagDocumentRepository
 from app.modules.rag.persistence.job_repository import RagJobRepository, RagJobRow
 from app.modules.rag.persistence.query_repository import RagQueryRepository
 from app.modules.rag.persistence.schema_repository import RagSchemaRepository
 from app.modules.rag.persistence.session_repository import RagSessionRepository
 from app.modules.shared.db import get_engine
 class RagRepository:
    def __init__(self) -> None:
        self._schema = RagSchemaRepository()
        self._sessions = RagSessionRepository()
        self._jobs = RagJobRepository()
        self._documents = RagDocumentRepository()
        self._cache = RagCacheRepository()
        self._query = RagQueryRepository()
    def ensure_tables(self) -> None:
        self._schema.ensure_tables()
    def upsert_session(self, rag_session_id: str, project_id: str) -> None:
        self._sessions.upsert_session(rag_session_id, project_id)
    def session_exists(self, rag_session_id: str) -> bool:
        return self._sessions.session_exists(rag_session_id)
    def get_session(self, rag_session_id: str) -> dict | None:
        return self._sessions.get_session(rag_session_id)
    def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None:
        self._jobs.create_job(index_job_id, rag_session_id, status)
    def update_job(self, index_job_id: str, **kwargs) -> None:
        self._jobs.update_job(index_job_id, **kwargs)
    def get_job(self, index_job_id: str) -> RagJobRow | None:
        return self._jobs.get_job(index_job_id)
    def replace_documents(self, rag_session_id: str, docs: list[RagDocument]) -> None:
        with get_engine().connect() as conn:
            self._documents.replace_documents(conn, rag_session_id, docs)
            conn.commit()
    def apply_document_changes(self, rag_session_id: str, delete_paths: list[str], docs: list[RagDocument]) -> None:
        with get_engine().connect() as conn:
            self._documents.apply_document_changes(conn, rag_session_id, delete_paths, docs)
            conn.commit()
    def get_cached_documents(self, repo_id: str, blob_sha: str) -> list[RagDocument]:
        return self._cache.get_cached_documents(repo_id, blob_sha)
    def cache_documents(self, repo_id: str, path: str, blob_sha: str, docs: list[RagDocument]) -> None:
        self._cache.cache_documents(repo_id, path, blob_sha, docs)
    def record_repo_cache(self, **kwargs) -> None:
        self._cache.record_repo_cache(**kwargs)
    def retrieve(
        self,
        rag_session_id: str,
        query_embedding: list[float],
        *,
        query_text: str = "",
        limit: int = 5,
        layers: list[str] | None = None,
        prefer_non_tests: bool = False,
    ) -> list[dict]:
        return self._query.retrieve(
            rag_session_id,
            query_embedding,
            query_text=query_text,
            limit=limit,
            layers=layers,
            prefer_non_tests=prefer_non_tests,
        )
    def fallback_chunks(self, rag_session_id: str, limit: int = 5, layers: list[str] | None = None) -> list[dict]:
        return self._query.fallback_chunks(rag_session_id, limit=limit, layers=layers)
--- a/app/modules/rag/persistence/schema_repository.py
+++ b/app/modules/rag/persistence/schema_repository.py
@@ -0,0 +1,179 @@
 from __future__ import annotations
 from sqlalchemy import text
 from app.modules.shared.db import get_engine
 class RagSchemaRepository:
    def ensure_tables(self) -> None:
        engine = get_engine()
        with engine.connect() as conn:
            conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_sessions (
                        rag_session_id VARCHAR(64) PRIMARY KEY,
                        project_id VARCHAR(512) NOT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_index_jobs (
                        index_job_id VARCHAR(64) PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        status VARCHAR(16) NOT NULL,
                        indexed_files INTEGER NOT NULL DEFAULT 0,
                        failed_files INTEGER NOT NULL DEFAULT 0,
                        cache_hit_files INTEGER NOT NULL DEFAULT 0,
                        cache_miss_files INTEGER NOT NULL DEFAULT 0,
                        error_code VARCHAR(128) NULL,
                        error_desc TEXT NULL,
                        error_module VARCHAR(64) NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_chunks (
                        id BIGSERIAL PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        path TEXT NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        content TEXT NOT NULL,
                        embedding vector NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_blob_cache (
                        id BIGSERIAL PRIMARY KEY,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        path TEXT NOT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path)
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_chunk_cache (
                        id BIGSERIAL PRIMARY KEY,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        content TEXT NOT NULL,
                        embedding vector NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index)
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_session_chunk_map (
                        id BIGSERIAL PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        path TEXT NOT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            self._ensure_columns(conn)
            self._ensure_indexes(conn)
            conn.commit()
    def _ensure_columns(self, conn) -> None:
        for statement in (
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS rag_doc_id VARCHAR(128) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS repo_id VARCHAR(512) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS title TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS symbol_id TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS qname TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS kind TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS framework TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS entrypoint_type TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS module_id TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section_path TEXT NULL",
            "ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_kind TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_id TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS doc_version TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS owner TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS system_component TEXT NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
            "ALTER TABLE rag_blob_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS section TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS layer VARCHAR(64) NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS lang VARCHAR(32) NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS path TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS title TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS metadata_json TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS links_json TEXT NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_start INTEGER NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS span_end INTEGER NULL",
            "ALTER TABLE rag_chunk_cache ADD COLUMN IF NOT EXISTS commit_sha VARCHAR(128) NULL",
            "ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0",
            "ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0",
        ):
            conn.execute(text(statement))
    def _ensure_indexes(self, conn) -> None:
        for statement in (
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer ON rag_chunks (rag_session_id, layer)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_layer_path ON rag_chunks (rag_session_id, layer, path)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_qname ON rag_chunks (qname)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_symbol_id ON rag_chunks (symbol_id)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_module_id ON rag_chunks (module_id)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc_kind ON rag_chunks (doc_kind)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunks_entrypoint ON rag_chunks (entrypoint_type, framework)",
            "CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)",
            "CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)",
            "CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)",
        ):
            conn.execute(text(statement))
--- a/app/modules/rag/persistence/session_repository.py
+++ b/app/modules/rag/persistence/session_repository.py
@@ -0,0 +1,37 @@
 from __future__ import annotations
 from sqlalchemy import text
 from app.modules.shared.db import get_engine
 class RagSessionRepository:
    def upsert_session(self, rag_session_id: str, project_id: str) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    INSERT INTO rag_sessions (rag_session_id, project_id)
                    VALUES (:sid, :pid)
                    ON CONFLICT (rag_session_id) DO UPDATE SET project_id = EXCLUDED.project_id
                    """
                ),
                {"sid": rag_session_id, "pid": project_id},
            )
            conn.commit()
    def session_exists(self, rag_session_id: str) -> bool:
        with get_engine().connect() as conn:
            row = conn.execute(
                text("SELECT 1 FROM rag_sessions WHERE rag_session_id = :sid"),
                {"sid": rag_session_id},
            ).fetchone()
        return bool(row)
    def get_session(self, rag_session_id: str) -> dict | None:
        with get_engine().connect() as conn:
            row = conn.execute(
                text("SELECT rag_session_id, project_id FROM rag_sessions WHERE rag_session_id = :sid"),
                {"sid": rag_session_id},
            ).mappings().fetchone()
        return dict(row) if row else None
--- a/app/modules/rag/retrieval/pycache/query_router.cpython-312.pyc
+++ b/app/modules/rag/retrieval/pycache/query_router.cpython-312.pyc
--- a/app/modules/rag/retrieval/pycache/query_terms.cpython-312.pyc
+++ b/app/modules/rag/retrieval/pycache/query_terms.cpython-312.pyc
--- a/app/modules/rag/retrieval/query_router.py
+++ b/app/modules/rag/retrieval/query_router.py
@@ -0,0 +1,43 @@
 from __future__ import annotations
 from app.modules.rag.contracts import RagLayer, RetrievalMode
 class RagQueryRouter:
    _CODE_HINTS = (
        "как работает код",
        "explain code",
        "explain the code",
        "по коду",
        "из кода",
        "построй документацию по коду",
        "документацию по коду",
        "where is implemented",
        "где реализовано",
        "endpoint",
        "handler",
        "symbol",
        "function",
        "class",
        "method",
    )
    _DOCS_LAYERS = [
        RagLayer.DOCS_MODULE_CATALOG,
        RagLayer.DOCS_FACT_INDEX,
        RagLayer.DOCS_SECTION_INDEX,
        RagLayer.DOCS_POLICY_INDEX,
    ]
    _CODE_LAYERS = [
        RagLayer.CODE_ENTRYPOINTS,
        RagLayer.CODE_SYMBOL_CATALOG,
        RagLayer.CODE_DEPENDENCY_GRAPH,
        RagLayer.CODE_SOURCE_CHUNKS,
    ]
    def resolve_mode(self, query: str) -> str:
        lowered = query.lower()
        return RetrievalMode.CODE if any(hint in lowered for hint in self._CODE_HINTS) else RetrievalMode.DOCS
    def layers_for_mode(self, mode: str) -> list[str]:
        return list(self._CODE_LAYERS if mode == RetrievalMode.CODE else self._DOCS_LAYERS)
--- a/app/modules/rag/retrieval/query_terms.py
+++ b/app/modules/rag/retrieval/query_terms.py
@@ -0,0 +1,45 @@
 from __future__ import annotations
 import re
 def extract_query_terms(query_text: str) -> list[str]:
    raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
    normalized: list[str] = []
    for term in raw_terms:
        for variant in _identifier_variants(term):
            if variant not in normalized:
                normalized.append(variant)
    for variant in _intent_variants(query_text):
        if variant not in normalized:
            normalized.append(variant)
    return normalized[:6]
 def _identifier_variants(term: str) -> list[str]:
    lowered = term.lower()
    variants = [lowered]
    snake = _camel_to_snake(term)
    if snake and snake not in variants:
        variants.append(snake)
    if lowered.endswith("manager") and len(lowered) > len("manager"):
        manager_split = lowered[: -len("manager")] + "_manager"
        if manager_split not in variants:
            variants.append(manager_split)
    compact = snake.replace("_", "") if snake else ""
    if compact and compact not in variants:
        variants.append(compact)
    return variants
 def _camel_to_snake(term: str) -> str:
    first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
    return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
 def _intent_variants(query_text: str) -> list[str]:
    lowered = (query_text or "").lower()
    variants: list[str] = []
    if any(token in lowered for token in ("управ", "control", "manage", "management")):
        variants.extend(["control", "management", "start", "stop", "status"])
    return variants
--- a/app/modules/rag/services/rag_service.py
+++ b/app/modules/rag/services/rag_service.py
@@ -0,0 +1,197 @@
 from __future__ import annotations
 import asyncio
 import hashlib
 import os
 from collections.abc import Awaitable, Callable
 from inspect import isawaitable
 from app.modules.rag.contracts import RagDocument
 from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
 from app.modules.rag.indexing.common.report import IndexReport
 from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
 from app.modules.rag.persistence.repository import RagRepository
 from app.modules.rag.retrieval.query_router import RagQueryRouter
 from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
 class RagService:
    def __init__(
        self,
        embedder: GigaChatEmbedder,
        repository: RagRepository,
        chunker=None,
    ) -> None:
        self._embedder = embedder
        self._repo = repository
        self._docs = DocsIndexingPipeline()
        self._code = CodeIndexingPipeline()
        self._queries = RagQueryRouter()
    async def index_snapshot(
        self,
        rag_session_id: str,
        files: list[dict],
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
    ) -> tuple[int, int, int, int]:
        report = await self._index_files(rag_session_id, files, progress_cb=progress_cb)
        self._repo.replace_documents(rag_session_id, report.documents_list)
        return report.as_tuple()
    async def index_changes(
        self,
        rag_session_id: str,
        changed_files: list[dict],
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
    ) -> tuple[int, int, int, int]:
        delete_paths: list[str] = []
        upserts: list[dict] = []
        for item in changed_files:
            if str(item.get("op")) == "delete":
                delete_paths.append(str(item.get("path", "")))
            else:
                upserts.append(item)
        report = await self._index_files(rag_session_id, upserts, progress_cb=progress_cb)
        self._repo.apply_document_changes(rag_session_id, delete_paths, report.documents_list)
        return report.as_tuple()
    async def retrieve(self, rag_session_id: str, query: str) -> list[dict]:
        mode = self._queries.resolve_mode(query)
        layers = self._queries.layers_for_mode(mode)
        prefer_non_tests = mode == "code" and "test" not in query.lower() and "тест" not in query.lower()
        try:
            query_embedding = self._embedder.embed([query])[0]
            rows = self._repo.retrieve(
                rag_session_id,
                query_embedding,
                query_text=query,
                limit=8,
                layers=layers,
                prefer_non_tests=prefer_non_tests,
            )
        except Exception:
            rows = self._repo.fallback_chunks(rag_session_id, limit=8, layers=layers)
        if not rows and mode != "docs":
            rows = self._repo.fallback_chunks(rag_session_id, limit=8, layers=self._queries.layers_for_mode("docs"))
        return [
            {
                "source": row["path"],
                "content": row["content"],
                "layer": row.get("layer"),
                "title": row.get("title"),
                "metadata": row.get("metadata", {}),
                "score": row.get("distance"),
            }
            for row in rows
        ]
    async def _index_files(
        self,
        rag_session_id: str,
        files: list[dict],
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
    ) -> "_PipelineReport":
        total_files = len(files)
        report = _PipelineReport()
        repo_id = self._resolve_repo_id(rag_session_id)
        for index, file in enumerate(files, start=1):
            path = str(file.get("path", ""))
            try:
                blob_sha = self._blob_sha(file)
                cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
                if cached:
                    report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha))
                    report.cache_hit_files += 1
                else:
                    built = self._build_documents(repo_id, path, file)
                    embedded = await asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha)
                    report.documents_list.extend(embedded)
                    await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded)
                    report.cache_miss_files += 1
                report.indexed_files += 1
            except Exception as exc:
                report.failed_files += 1
                report.warnings.append(f"{path}: {exc}")
            await self._notify_progress(progress_cb, index, total_files, path)
        report.documents = len(report.documents_list)
        return report
    def _build_documents(self, repo_id: str, path: str, file: dict) -> list[RagDocument]:
        content = str(file.get("content") or "")
        commit_sha = file.get("commit_sha")
        docs: list[RagDocument] = []
        if self._docs.supports(path):
            docs.extend(self._docs.index_file(repo_id=repo_id, commit_sha=commit_sha, path=path, content=content))
        if self._code.supports(path):
            docs.extend(self._code.index_file(repo_id=repo_id, commit_sha=commit_sha, path=path, content=content))
        if not docs:
            docs.extend(self._docs.index_file(repo_id=repo_id, commit_sha=commit_sha, path=path, content=content))
        return docs
    def _embed_documents(self, docs: list[RagDocument], file: dict, repo_id: str, blob_sha: str) -> list[RagDocument]:
        if not docs:
            return []
        batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16")))
        metadata = self._document_metadata(file, repo_id, blob_sha)
        for doc in docs:
            doc.metadata.update(metadata)
        for start in range(0, len(docs), batch_size):
            batch = docs[start : start + batch_size]
            vectors = self._embedder.embed([doc.text for doc in batch])
            for doc, vector in zip(batch, vectors):
                doc.embedding = vector
        return docs
    def _with_file_metadata(self, docs: list[RagDocument], file: dict, repo_id: str, blob_sha: str) -> list[RagDocument]:
        metadata = self._document_metadata(file, repo_id, blob_sha)
        for doc in docs:
            doc.metadata.update(metadata)
            doc.source.repo_id = repo_id
            doc.source.path = str(file.get("path", doc.source.path))
        return docs
    def _document_metadata(self, file: dict, repo_id: str, blob_sha: str) -> dict:
        return {
            "blob_sha": blob_sha,
            "repo_id": repo_id,
            "artifact_type": file.get("artifact_type"),
            "section": file.get("section"),
            "doc_id": file.get("doc_id"),
            "doc_version": file.get("doc_version"),
            "owner": file.get("owner"),
            "system_component": file.get("system_component"),
            "last_modified": file.get("last_modified"),
            "staleness_score": file.get("staleness_score"),
        }
    def _resolve_repo_id(self, rag_session_id: str) -> str:
        session = self._repo.get_session(rag_session_id)
        if not session:
            return rag_session_id
        return str(session.get("project_id") or rag_session_id)
    def _blob_sha(self, file: dict) -> str:
        raw = str(file.get("content_hash") or "").strip()
        if raw:
            return raw
        content = str(file.get("content") or "")
        return hashlib.sha256(content.encode("utf-8")).hexdigest()
    async def _notify_progress(
        self,
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None,
        current_file_index: int,
        total_files: int,
        current_file_name: str,
    ) -> None:
        if not progress_cb:
            return
        result = progress_cb(current_file_index, total_files, current_file_name)
        if isawaitable(result):
            await result
 class _PipelineReport(IndexReport):
    def __init__(self) -> None:
        super().__init__()
        self.documents_list: list[RagDocument] = []
--- a/app/modules/rag_repo/module.py
+++ b/app/modules/rag_repo/module.py
@@ -3,8 +3,8 @@ from __future__ import annotations
 from fastapi import APIRouter, Request
 from app.modules.agent.story_context_repository import StoryContextRepository
 from app.modules.rag.persistence.repository import RagRepository
 from app.modules.rag_repo.webhook_service import RepoWebhookService
 from app.modules.rag_session.repository import RagRepository
 class RagRepoModule:
--- a/app/modules/rag_session/repository.py
+++ b/app/modules/rag_session/repository.py
@@ -1,660 +1,4 @@
-from __future__ import annotations
+from app.modules.rag.persistence.job_repository import RagJobRow
 from app.modules.rag.persistence.repository import RagRepository
-from dataclasses import dataclass
+__all__ = ["RagJobRow", "RagRepository"]
 import hashlib
 from sqlalchemy import text
 from app.modules.shared.db import get_engine
@dataclass
 class RagJobRow:
    index_job_id: str
    rag_session_id: str
    status: str
    indexed_files: int
    failed_files: int
    cache_hit_files: int
    cache_miss_files: int
    error_code: str | None
    error_desc: str | None
    error_module: str | None
 class RagRepository:
    def ensure_tables(self) -> None:
        engine = get_engine()
        with engine.connect() as conn:
            conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_sessions (
                        rag_session_id VARCHAR(64) PRIMARY KEY,
                        project_id VARCHAR(512) NOT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_index_jobs (
                        index_job_id VARCHAR(64) PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        status VARCHAR(16) NOT NULL,
                        indexed_files INTEGER NOT NULL DEFAULT 0,
                        failed_files INTEGER NOT NULL DEFAULT 0,
                        cache_hit_files INTEGER NOT NULL DEFAULT 0,
                        cache_miss_files INTEGER NOT NULL DEFAULT 0,
                        error_code VARCHAR(128) NULL,
                        error_desc TEXT NULL,
                        error_module VARCHAR(64) NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_chunks (
                        id BIGSERIAL PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        path TEXT NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        content TEXT NOT NULL,
                        embedding vector NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS artifact_type VARCHAR(16) NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS section TEXT NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_id TEXT NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS doc_version TEXT NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS owner TEXT NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS system_component TEXT NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS last_modified TIMESTAMPTZ NULL"))
            conn.execute(text("ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS staleness_score DOUBLE PRECISION NULL"))
            conn.execute(
                text(
                    """
                    ALTER TABLE rag_chunks
                    ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    """
                )
            )
            conn.execute(
                text(
                    """
                    ALTER TABLE rag_chunks
                    ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_blob_cache (
                        id BIGSERIAL PRIMARY KEY,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        path TEXT NOT NULL,
                        artifact_type VARCHAR(16) NULL,
                        section TEXT NULL,
                        doc_id TEXT NULL,
                        doc_version TEXT NULL,
                        owner TEXT NULL,
                        system_component TEXT NULL,
                        last_modified TIMESTAMPTZ NULL,
                        staleness_score DOUBLE PRECISION NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        CONSTRAINT uq_rag_blob_cache UNIQUE (repo_id, blob_sha, path)
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_chunk_cache (
                        id BIGSERIAL PRIMARY KEY,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        content TEXT NOT NULL,
                        embedding vector NULL,
                        section TEXT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
                        CONSTRAINT uq_rag_chunk_cache UNIQUE (repo_id, blob_sha, chunk_index)
                    )
                    """
                )
            )
            conn.execute(
                text(
                    """
                    CREATE TABLE IF NOT EXISTS rag_session_chunk_map (
                        id BIGSERIAL PRIMARY KEY,
                        rag_session_id VARCHAR(64) NOT NULL,
                        repo_id VARCHAR(512) NOT NULL,
                        blob_sha VARCHAR(128) NOT NULL,
                        chunk_index INTEGER NOT NULL,
                        path TEXT NOT NULL,
                        created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
                    )
                    """
                )
            )
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_session ON rag_chunks (rag_session_id)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_artifact_type ON rag_chunks (artifact_type)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_doc ON rag_chunks (doc_id, doc_version)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_component ON rag_chunks (system_component)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunks_path ON rag_chunks (path)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_blob_cache_repo_blob ON rag_blob_cache (repo_id, blob_sha)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chunk_cache_repo_blob ON rag_chunk_cache (repo_id, blob_sha, chunk_index)"))
            conn.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_session_chunk_map_session ON rag_session_chunk_map (rag_session_id, created_at DESC)"))
            conn.execute(text("ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_hit_files INTEGER NOT NULL DEFAULT 0"))
            conn.execute(text("ALTER TABLE rag_index_jobs ADD COLUMN IF NOT EXISTS cache_miss_files INTEGER NOT NULL DEFAULT 0"))
            conn.commit()
    def upsert_session(self, rag_session_id: str, project_id: str) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    INSERT INTO rag_sessions (rag_session_id, project_id)
                    VALUES (:sid, :pid)
                    ON CONFLICT (rag_session_id) DO UPDATE SET project_id = EXCLUDED.project_id
                    """
                ),
                {"sid": rag_session_id, "pid": project_id},
            )
            conn.commit()
    def session_exists(self, rag_session_id: str) -> bool:
        with get_engine().connect() as conn:
            row = conn.execute(
                text("SELECT 1 FROM rag_sessions WHERE rag_session_id = :sid"),
                {"sid": rag_session_id},
            ).fetchone()
            return bool(row)
    def get_session(self, rag_session_id: str) -> dict | None:
        with get_engine().connect() as conn:
            row = conn.execute(
                text("SELECT rag_session_id, project_id FROM rag_sessions WHERE rag_session_id = :sid"),
                {"sid": rag_session_id},
            ).mappings().fetchone()
        return dict(row) if row else None
    def create_job(self, index_job_id: str, rag_session_id: str, status: str) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    INSERT INTO rag_index_jobs (index_job_id, rag_session_id, status)
                    VALUES (:jid, :sid, :status)
                    """
                ),
                {"jid": index_job_id, "sid": rag_session_id, "status": status},
            )
            conn.commit()
    def update_job(
        self,
        index_job_id: str,
        *,
        status: str,
        indexed_files: int,
        failed_files: int,
        cache_hit_files: int = 0,
        cache_miss_files: int = 0,
        error_code: str | None = None,
        error_desc: str | None = None,
        error_module: str | None = None,
    ) -> None:
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    UPDATE rag_index_jobs
                    SET status = :status,
                        indexed_files = :indexed,
                        failed_files = :failed,
                        cache_hit_files = :cache_hit_files,
                        cache_miss_files = :cache_miss_files,
                        error_code = :ecode,
                        error_desc = :edesc,
                        error_module = :emodule,
                        updated_at = CURRENT_TIMESTAMP
                    WHERE index_job_id = :jid
                    """
                ),
                {
                    "jid": index_job_id,
                    "status": status,
                    "indexed": indexed_files,
                    "failed": failed_files,
                    "cache_hit_files": cache_hit_files,
                    "cache_miss_files": cache_miss_files,
                    "ecode": error_code,
                    "edesc": error_desc,
                    "emodule": error_module,
                },
            )
            conn.commit()
    def get_job(self, index_job_id: str) -> RagJobRow | None:
        with get_engine().connect() as conn:
            row = conn.execute(
                text(
                    """
                    SELECT index_job_id, rag_session_id, status, indexed_files, failed_files,
                           cache_hit_files, cache_miss_files, error_code, error_desc, error_module
                    FROM rag_index_jobs
                    WHERE index_job_id = :jid
                    """
                ),
                {"jid": index_job_id},
            ).mappings().fetchone()
        if not row:
            return None
        return RagJobRow(**dict(row))
    def replace_chunks(self, rag_session_id: str, items: list[dict]) -> None:
        with get_engine().connect() as conn:
            conn.execute(text("DELETE FROM rag_chunks WHERE rag_session_id = :sid"), {"sid": rag_session_id})
            conn.execute(text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid"), {"sid": rag_session_id})
            self._insert_chunks(conn, rag_session_id, items)
            conn.commit()
    def apply_changes(self, rag_session_id: str, delete_paths: list[str], upserts: list[dict]) -> None:
        with get_engine().connect() as conn:
            if delete_paths:
                conn.execute(
                    text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                    {"sid": rag_session_id, "paths": delete_paths},
                )
                conn.execute(
                    text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                    {"sid": rag_session_id, "paths": delete_paths},
                )
            if upserts:
                paths = sorted({str(x["path"]) for x in upserts})
                conn.execute(
                    text("DELETE FROM rag_chunks WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                    {"sid": rag_session_id, "paths": paths},
                )
                conn.execute(
                    text("DELETE FROM rag_session_chunk_map WHERE rag_session_id = :sid AND path = ANY(:paths)"),
                    {"sid": rag_session_id, "paths": paths},
                )
                self._insert_chunks(conn, rag_session_id, upserts)
            conn.commit()
    def get_cached_chunks(self, repo_id: str, blob_sha: str) -> list[dict]:
        with get_engine().connect() as conn:
            rows = conn.execute(
                text(
                    """
                    SELECT chunk_index, content, embedding::text AS embedding_txt, section
                    FROM rag_chunk_cache
                    WHERE repo_id = :repo_id AND blob_sha = :blob_sha
                    ORDER BY chunk_index ASC
                    """
                ),
                {"repo_id": repo_id, "blob_sha": blob_sha},
            ).mappings().fetchall()
        output: list[dict] = []
        for row in rows:
            output.append(
                {
                    "chunk_index": int(row["chunk_index"]),
                    "content": str(row["content"] or ""),
                    "embedding": self._parse_vector(str(row["embedding_txt"] or "")),
                    "section": row.get("section"),
                }
            )
        return output
    def record_repo_cache(
        self,
        *,
        project_id: str,
        commit_sha: str | None,
        changed_files: list[str],
        summary: str,
    ) -> None:
        repo_session_id = f"repo:{project_id}"
        with get_engine().connect() as conn:
            for path in changed_files:
                key = f"{commit_sha or 'no-commit'}:{path}"
                blob_sha = hashlib.sha256(key.encode("utf-8")).hexdigest()
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_blob_cache (
                            repo_id,
                            blob_sha,
                            path,
                            artifact_type,
                            section
                        )
                        VALUES (
                            :repo_id,
                            :blob_sha,
                            :path,
                            :artifact_type,
                            :section
                        )
                        ON CONFLICT (repo_id, blob_sha, path) DO UPDATE SET
                            updated_at = CURRENT_TIMESTAMP
                        """
                    ),
                    {
                        "repo_id": project_id,
                        "blob_sha": blob_sha,
                        "path": path,
                        "artifact_type": "CODE",
                        "section": "repo_webhook",
                    },
                )
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_chunk_cache (
                            repo_id,
                            blob_sha,
                            chunk_index,
                            content,
                            embedding,
                            section
                        )
                        VALUES (
                            :repo_id,
                            :blob_sha,
                            0,
                            :content,
                            NULL,
                            :section
                        )
                        ON CONFLICT (repo_id, blob_sha, chunk_index) DO UPDATE SET
                            content = EXCLUDED.content,
                            section = EXCLUDED.section,
                            updated_at = CURRENT_TIMESTAMP
                        """
                    ),
                    {
                        "repo_id": project_id,
                        "blob_sha": blob_sha,
                        "content": f"repo_webhook:{path}:{summary[:300]}",
                        "section": "repo_webhook",
                    },
                )
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_session_chunk_map (
                            rag_session_id,
                            repo_id,
                            blob_sha,
                            chunk_index,
                            path
                        )
                        VALUES (
                            :rag_session_id,
                            :repo_id,
                            :blob_sha,
                            0,
                            :path
                        )
                        """
                    ),
                    {
                        "rag_session_id": repo_session_id,
                        "repo_id": project_id,
                        "blob_sha": blob_sha,
                        "path": path,
                    },
                )
            conn.commit()
    def cache_file_chunks(self, repo_id: str, path: str, blob_sha: str, items: list[dict]) -> None:
        if not items:
            return
        meta = items[0]
        with get_engine().connect() as conn:
            conn.execute(
                text(
                    """
                    INSERT INTO rag_blob_cache (
                        repo_id,
                        blob_sha,
                        path,
                        artifact_type,
                        section,
                        doc_id,
                        doc_version,
                        owner,
                        system_component,
                        last_modified,
                        staleness_score
                    )
                    VALUES (
                        :repo_id,
                        :blob_sha,
                        :path,
                        :artifact_type,
                        :section,
                        :doc_id,
                        :doc_version,
                        :owner,
                        :system_component,
                        :last_modified,
                        :staleness_score
                    )
                    ON CONFLICT (repo_id, blob_sha, path) DO UPDATE SET
                        artifact_type = EXCLUDED.artifact_type,
                        section = EXCLUDED.section,
                        doc_id = EXCLUDED.doc_id,
                        doc_version = EXCLUDED.doc_version,
                        owner = EXCLUDED.owner,
                        system_component = EXCLUDED.system_component,
                        last_modified = EXCLUDED.last_modified,
                        staleness_score = EXCLUDED.staleness_score,
                        updated_at = CURRENT_TIMESTAMP
                    """
                ),
                {
                    "repo_id": repo_id,
                    "blob_sha": blob_sha,
                    "path": path,
                    "artifact_type": meta.get("artifact_type"),
                    "section": meta.get("section"),
                    "doc_id": meta.get("doc_id"),
                    "doc_version": meta.get("doc_version"),
                    "owner": meta.get("owner"),
                    "system_component": meta.get("system_component"),
                    "last_modified": meta.get("last_modified"),
                    "staleness_score": meta.get("staleness_score"),
                },
            )
            for item in items:
                emb = item.get("embedding") or []
                emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_chunk_cache (
                            repo_id,
                            blob_sha,
                            chunk_index,
                            content,
                            embedding,
                            section
                        )
                        VALUES (
                            :repo_id,
                            :blob_sha,
                            :chunk_index,
                            :content,
                            CAST(:embedding AS vector),
                            :section
                        )
                        ON CONFLICT (repo_id, blob_sha, chunk_index) DO UPDATE SET
                            content = EXCLUDED.content,
                            embedding = EXCLUDED.embedding,
                            section = EXCLUDED.section,
                            updated_at = CURRENT_TIMESTAMP
                        """
                    ),
                    {
                        "repo_id": repo_id,
                        "blob_sha": blob_sha,
                        "chunk_index": int(item["chunk_index"]),
                        "content": item["content"],
                        "embedding": emb_str,
                        "section": item.get("section"),
                    },
                )
            conn.commit()
    def retrieve(self, rag_session_id: str, query_embedding: list[float], limit: int = 5) -> list[dict]:
        emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
        with get_engine().connect() as conn:
            rows = conn.execute(
                text(
                    """
                    SELECT path, content
                    FROM rag_chunks
                    WHERE rag_session_id = :sid
                    ORDER BY embedding <=> CAST(:emb AS vector)
                    LIMIT :lim
                    """
                ),
                {"sid": rag_session_id, "emb": emb, "lim": limit},
            ).mappings().fetchall()
        return [dict(x) for x in rows]
    def fallback_chunks(self, rag_session_id: str, limit: int = 5) -> list[dict]:
        with get_engine().connect() as conn:
            rows = conn.execute(
                text(
                    """
                    SELECT path, content
                    FROM rag_chunks
                    WHERE rag_session_id = :sid
                    ORDER BY id DESC
                    LIMIT :lim
                    """
                ),
                {"sid": rag_session_id, "lim": limit},
            ).mappings().fetchall()
        return [dict(x) for x in rows]
    def _insert_chunks(self, conn, rag_session_id: str, items: list[dict]) -> None:
        for item in items:
            emb = item.get("embedding") or []
            emb_str = "[" + ",".join(str(x) for x in emb) + "]" if emb else None
            conn.execute(
                text(
                    """
                    INSERT INTO rag_chunks (
                        rag_session_id,
                        path,
                        chunk_index,
                        content,
                        embedding,
                        artifact_type,
                        section,
                        doc_id,
                        doc_version,
                        owner,
                        system_component,
                        last_modified,
                        staleness_score,
                        created_at,
                        updated_at
                    )
                    VALUES (
                        :sid,
                        :path,
                        :idx,
                        :content,
                        CAST(:emb AS vector),
                        :artifact_type,
                        :section,
                        :doc_id,
                        :doc_version,
                        :owner,
                        :system_component,
                        :last_modified,
                        :staleness_score,
                        CURRENT_TIMESTAMP,
                        CURRENT_TIMESTAMP
                    )
                    """
                ),
                {
                    "sid": rag_session_id,
                    "path": item["path"],
                    "idx": int(item["chunk_index"]),
                    "content": item["content"],
                    "emb": emb_str,
                    "artifact_type": item.get("artifact_type"),
                    "section": item.get("section"),
                    "doc_id": item.get("doc_id"),
                    "doc_version": item.get("doc_version"),
                    "owner": item.get("owner"),
                    "system_component": item.get("system_component"),
                    "last_modified": item.get("last_modified"),
                    "staleness_score": item.get("staleness_score"),
                },
            )
            repo_id = str(item.get("repo_id") or "").strip()
            blob_sha = str(item.get("blob_sha") or "").strip()
            if repo_id and blob_sha:
                conn.execute(
                    text(
                        """
                        INSERT INTO rag_session_chunk_map (
                            rag_session_id,
                            repo_id,
                            blob_sha,
                            chunk_index,
                            path
                        ) VALUES (
                            :sid,
                            :repo_id,
                            :blob_sha,
                            :chunk_index,
                            :path
                        )
                        """
                    ),
                    {
                        "sid": rag_session_id,
                        "repo_id": repo_id,
                        "blob_sha": blob_sha,
                        "chunk_index": int(item["chunk_index"]),
                        "path": item["path"],
                    },
                )
    def _parse_vector(self, value: str) -> list[float]:
        text_value = value.strip()
        if not text_value:
            return []
        if text_value.startswith("[") and text_value.endswith("]"):
            text_value = text_value[1:-1]
        if not text_value:
            return []
        return [float(part.strip()) for part in text_value.split(",") if part.strip()]
--- a/app/modules/rag_session/service.py
+++ b/app/modules/rag_session/service.py
@@ -1,211 +1,3 @@
-import asyncio
+from app.modules.rag.services.rag_service import RagService
 import hashlib
 import os
 from collections.abc import Awaitable, Callable
 from inspect import isawaitable
-from app.modules.rag_session.embedding.gigachat_embedder import GigaChatEmbedder
+__all__ = ["RagService"]
 from app.modules.rag_session.repository import RagRepository
 from app.modules.rag_session.retrieval.chunker import TextChunker
 class RagService:
    def __init__(
        self,
        embedder: GigaChatEmbedder,
        repository: RagRepository,
        chunker: TextChunker | None = None,
    ) -> None:
        self._embedder = embedder
        self._repo = repository
        self._chunker = chunker or TextChunker()
    async def index_snapshot(
        self,
        rag_session_id: str,
        files: list[dict],
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
    ) -> tuple[int, int, int, int]:
        total_files = len(files)
        indexed_files = 0
        failed_files = 0
        cache_hit_files = 0
        cache_miss_files = 0
        all_chunks: list[dict] = []
        repo_id = self._resolve_repo_id(rag_session_id)
        for index, file in enumerate(files, start=1):
            path = str(file.get("path", ""))
            try:
                blob_sha = self._blob_sha(file)
                cached = await asyncio.to_thread(self._repo.get_cached_chunks, repo_id, blob_sha)
                if cached:
                    all_chunks.extend(self._build_cached_items(path, file, repo_id, blob_sha, cached))
                    cache_hit_files += 1
                else:
                    chunks = self._build_chunks_for_file(file)
                    embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks, file, repo_id, blob_sha)
                    all_chunks.extend(embedded_chunks)
                    await asyncio.to_thread(self._repo.cache_file_chunks, repo_id, path, blob_sha, embedded_chunks)
                    cache_miss_files += 1
                indexed_files += 1
            except Exception:
                failed_files += 1
            await self._notify_progress(progress_cb, index, total_files, path)
        await asyncio.to_thread(self._repo.replace_chunks, rag_session_id, all_chunks)
        return indexed_files, failed_files, cache_hit_files, cache_miss_files
    async def index_changes(
        self,
        rag_session_id: str,
        changed_files: list[dict],
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None = None,
    ) -> tuple[int, int, int, int]:
        total_files = len(changed_files)
        indexed_files = 0
        failed_files = 0
        cache_hit_files = 0
        cache_miss_files = 0
        delete_paths: list[str] = []
        upsert_chunks: list[dict] = []
        repo_id = self._resolve_repo_id(rag_session_id)
        for index, file in enumerate(changed_files, start=1):
            path = str(file.get("path", ""))
            op = str(file.get("op", ""))
            try:
                if op == "delete":
                    delete_paths.append(path)
                    indexed_files += 1
                    await self._notify_progress(progress_cb, index, total_files, path)
                    continue
                if op == "upsert" and file.get("content") is not None:
                    blob_sha = self._blob_sha(file)
                    cached = await asyncio.to_thread(self._repo.get_cached_chunks, repo_id, blob_sha)
                    if cached:
                        upsert_chunks.extend(self._build_cached_items(path, file, repo_id, blob_sha, cached))
                        cache_hit_files += 1
                    else:
                        chunks = self._build_chunks_for_file(file)
                        embedded_chunks = await asyncio.to_thread(self._embed_chunks, chunks, file, repo_id, blob_sha)
                        upsert_chunks.extend(embedded_chunks)
                        await asyncio.to_thread(self._repo.cache_file_chunks, repo_id, path, blob_sha, embedded_chunks)
                        cache_miss_files += 1
                    indexed_files += 1
                    await self._notify_progress(progress_cb, index, total_files, path)
                    continue
                failed_files += 1
            except Exception:
                failed_files += 1
            await self._notify_progress(progress_cb, index, total_files, path)
        await asyncio.to_thread(
            self._repo.apply_changes,
            rag_session_id,
            delete_paths,
            upsert_chunks,
        )
        return indexed_files, failed_files, cache_hit_files, cache_miss_files
    async def retrieve(self, rag_session_id: str, query: str) -> list[dict]:
        try:
            query_embedding = self._embedder.embed([query])[0]
            rows = self._repo.retrieve(rag_session_id, query_embedding, limit=5)
        except Exception:
            rows = self._repo.fallback_chunks(rag_session_id, limit=5)
        return [{"source": row["path"], "content": row["content"]} for row in rows]
    def _build_chunks_for_file(self, file: dict) -> list[tuple[str, int, str]]:
        path = str(file.get("path", ""))
        content = str(file.get("content", ""))
        output: list[tuple[str, int, str]] = []
        for idx, chunk in enumerate(self._chunker.chunk(content)):
            output.append((path, idx, chunk))
        return output
    def _embed_chunks(self, raw_chunks: list[tuple[str, int, str]], file: dict, repo_id: str, blob_sha: str) -> list[dict]:
        if not raw_chunks:
            return []
        batch_size = max(1, int(os.getenv("RAG_EMBED_BATCH_SIZE", "16")))
        metadata = self._chunk_metadata(file)
        indexed: list[dict] = []
        for i in range(0, len(raw_chunks), batch_size):
            batch = raw_chunks[i : i + batch_size]
            texts = [x[2] for x in batch]
            vectors = self._embedder.embed(texts)
            for (path, chunk_index, content), vector in zip(batch, vectors):
                indexed.append(
                    {
                        "path": path,
                        "chunk_index": chunk_index,
                        "content": content,
                        "embedding": vector,
                        "repo_id": repo_id,
                        "blob_sha": blob_sha,
                        **metadata,
                    }
                )
        return indexed
    def _build_cached_items(
        self,
        path: str,
        file: dict,
        repo_id: str,
        blob_sha: str,
        cached: list[dict],
    ) -> list[dict]:
        metadata = self._chunk_metadata(file)
        output: list[dict] = []
        for item in cached:
            output.append(
                {
                    "path": path,
                    "chunk_index": int(item["chunk_index"]),
                    "content": str(item["content"]),
                    "embedding": item.get("embedding") or [],
                    "repo_id": repo_id,
                    "blob_sha": blob_sha,
                    **metadata,
                    "section": item.get("section") or metadata.get("section"),
                }
            )
        return output
    def _resolve_repo_id(self, rag_session_id: str) -> str:
        session = self._repo.get_session(rag_session_id)
        if not session:
            return rag_session_id
        return str(session.get("project_id") or rag_session_id)
    def _blob_sha(self, file: dict) -> str:
        raw = str(file.get("content_hash") or "").strip()
        if raw:
            return raw
        content = str(file.get("content") or "")
        return hashlib.sha256(content.encode("utf-8")).hexdigest()
    def _chunk_metadata(self, file: dict) -> dict:
        return {
            "artifact_type": file.get("artifact_type"),
            "section": file.get("section"),
            "doc_id": file.get("doc_id"),
            "doc_version": file.get("doc_version"),
            "owner": file.get("owner"),
            "system_component": file.get("system_component"),
            "last_modified": file.get("last_modified"),
            "staleness_score": file.get("staleness_score"),
        }
    async def _notify_progress(
        self,
        progress_cb: Callable[[int, int, str], Awaitable[None] | None] | None,
        current_file_index: int,
        total_files: int,
        current_file_name: str,
    ) -> None:
        if not progress_cb:
            return
        result = progress_cb(current_file_index, total_files, current_file_name)
        if isawaitable(result):
            await result
--- a/tests/agent/orchestrator/pycache/test_explain_actions.cpython-312-pytest-9.0.2.pyc
+++ b/tests/agent/orchestrator/pycache/test_explain_actions.cpython-312-pytest-9.0.2.pyc
--- a/tests/agent/orchestrator/test_explain_actions.py
+++ b/tests/agent/orchestrator/test_explain_actions.py
@@ -0,0 +1,131 @@
 from app.modules.agent.engine.orchestrator.actions.explain_actions import ExplainActions
 from app.modules.agent.engine.orchestrator.execution_context import ExecutionContext
 from app.modules.agent.engine.orchestrator.models import (
    ExecutionPlan,
    OutputContract,
    RoutingMeta,
    Scenario,
    TaskConstraints,
    TaskSpec,
 )
 def _ctx(rag_items: list[dict]) -> ExecutionContext:
    task = TaskSpec(
        task_id="task-1",
        dialog_session_id="dialog-1",
        rag_session_id="rag-1",
        user_message="Объясни по коду как работает task_processor",
        scenario=Scenario.EXPLAIN_PART,
        routing=RoutingMeta(domain_id="project", process_id="qa", confidence=0.9, reason="test"),
        constraints=TaskConstraints(),
        output_contract=OutputContract(result_type="answer"),
        metadata={
            "rag_items": rag_items,
            "rag_context": "",
            "confluence_context": "",
            "files_map": {},
        },
    )
    plan = ExecutionPlan(
        plan_id="plan-1",
        task_id="task-1",
        scenario=Scenario.EXPLAIN_PART,
        template_id="tpl",
        template_version="1",
        steps=[],
    )
    return ExecutionContext(task=task, plan=plan, graph_resolver=lambda *_: None, graph_invoker=lambda *_: {})
 def test_explain_actions_switch_to_code_profile_when_code_layers_present() -> None:
    ctx = _ctx(
        [
            {
                "source": "app/task_processor.py",
                "layer": "C1_SYMBOL_CATALOG",
                "title": "task_processor.process_task",
                "content": "function task_processor.process_task(task)",
                "metadata": {"qname": "task_processor.process_task", "kind": "function"},
            },
            {
                "source": "app/task_processor.py",
                "layer": "C2_DEPENDENCY_GRAPH",
                "title": "task_processor.process_task:calls",
                "content": "task_processor.process_task calls queue.publish",
                "metadata": {"edge_type": "calls"},
            },
        ]
    )
    actions = ExplainActions()
    actions.collect_sources(ctx)
    actions.extract_logic(ctx)
    actions.summarize(ctx)
    sources = ctx.artifacts.get_content("sources", {})
    assert sources["source_profile"] == "code"
    answer = str(ctx.artifacts.get_content("final_answer", ""))
    assert "кодовых слоев индекса" not in answer
    assert "CodeRAG" not in answer
    assert "app/task_processor.py" in answer
    assert "requirements/docs context" not in answer
 def test_explain_actions_add_code_details_block() -> None:
    ctx = _ctx(
        [
            {
                "source": "src/config_manager/__init__.py",
                "layer": "C1_SYMBOL_CATALOG",
                "title": "ConfigManager",
                "content": "const ConfigManager\nConfigManager = config_manager.v2.ConfigManagerV2",
                "metadata": {
                    "qname": "ConfigManager",
                    "kind": "const",
                    "lang_payload": {"imported_from": "v2.ConfigManagerV2", "import_alias": True},
                },
            },
            {
                "source": "src/config_manager/v2/control/base.py",
                "layer": "C1_SYMBOL_CATALOG",
                "title": "ControlChannel",
                "content": "class ControlChannel\nControlChannel(ABC)",
                "metadata": {"qname": "ControlChannel", "kind": "class"},
            },
            {
                "source": "src/config_manager/v2/core/control_bridge.py",
                "layer": "C1_SYMBOL_CATALOG",
                "title": "ControlChannelBridge",
                "content": "class ControlChannelBridge\nПредоставляет halt и status как обработчики start/stop/status",
                "metadata": {"qname": "ControlChannelBridge", "kind": "class"},
            },
            {
                "source": "src/config_manager/v2/core/control_bridge.py",
                "layer": "C2_DEPENDENCY_GRAPH",
                "title": "ControlChannelBridge.on_start:calls",
                "content": "ControlChannelBridge.on_start calls self._start_runtime",
                "metadata": {"src_qname": "ControlChannelBridge.on_start", "dst_ref": "self._start_runtime"},
            },
            {
                "source": "src/config_manager/v2/__init__.py",
                "layer": "C0_SOURCE_CHUNKS",
                "title": "src/config_manager/v2/__init__.py:1-6",
                "content": '"""Контракт: управление через API (config.yaml, секция management)."""',
                "metadata": {},
            },
        ]
    )
    actions = ExplainActions()
    actions.collect_sources(ctx)
    actions.extract_logic(ctx)
    actions.summarize(ctx)
    answer = str(ctx.artifacts.get_content("final_answer", ""))
    assert "### Что видно по коду" in answer
    assert "ConfigManager` в проекте доступен как alias" in answer
    assert "ControlChannelBridge.on_start" in answer
    assert "### Где смотреть в проекте" in answer
    assert "В индексе нет точного символа" not in answer
    assert "отдельный интерфейс управления" in answer
--- a/tests/rag/pycache/test_code_indexing_pipeline.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_code_indexing_pipeline.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/pycache/test_docs_indexing_pipeline.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_docs_indexing_pipeline.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/pycache/test_local_project_reader.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_local_project_reader.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/pycache/test_query_repository_terms.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_query_repository_terms.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/pycache/test_query_router.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_query_router.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/pycache/test_query_terms.cpython-312-pytest-9.0.2.pyc
+++ b/tests/rag/pycache/test_query_terms.cpython-312-pytest-9.0.2.pyc
--- a/tests/rag/test_code_indexing_pipeline.py
+++ b/tests/rag/test_code_indexing_pipeline.py
@@ -0,0 +1,57 @@
 from app.modules.rag.contracts.enums import RagLayer
 from app.modules.rag.indexing.code.pipeline import CodeIndexingPipeline
 def test_code_pipeline_builds_source_symbols_edges_and_entrypoints() -> None:
    pipeline = CodeIndexingPipeline()
    content = """
 from fastapi import APIRouter
 router = APIRouter()
 class UserService:
    def get_user(self, user_id):
        return user_id
@router.get("/users/{user_id}")
 async def get_user(user_id: str):
    service = UserService()
    return service.get_user(user_id)
 """
    docs = pipeline.index_file(
        repo_id="acme/proj",
        commit_sha="abc123",
        path="app/api/users.py",
        content=content,
    )
    layers = {doc.layer for doc in docs}
    assert RagLayer.CODE_SOURCE_CHUNKS in layers
    assert RagLayer.CODE_SYMBOL_CATALOG in layers
    assert RagLayer.CODE_DEPENDENCY_GRAPH in layers
    assert RagLayer.CODE_ENTRYPOINTS in layers
    symbol_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_SYMBOL_CATALOG and doc.metadata["kind"] == "function")
    assert "get_user" in symbol_doc.metadata["qname"]
    edge_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_DEPENDENCY_GRAPH)
    assert edge_doc.metadata["edge_type"] in {"calls", "imports", "inherits"}
    entry_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_ENTRYPOINTS)
    assert entry_doc.metadata["framework"] == "fastapi"
 def test_code_pipeline_indexes_import_alias_as_symbol() -> None:
    pipeline = CodeIndexingPipeline()
    content = "from .v2 import ConfigManagerV2 as ConfigManager\n"
    docs = pipeline.index_file(
        repo_id="acme/proj",
        commit_sha="abc123",
        path="src/config_manager/__init__.py",
        content=content,
    )
    alias_doc = next(doc for doc in docs if doc.layer == RagLayer.CODE_SYMBOL_CATALOG and doc.metadata["qname"] == "ConfigManager")
    assert alias_doc.metadata["kind"] == "const"
    assert alias_doc.metadata["lang_payload"]["import_alias"] is True
--- a/tests/rag/test_docs_indexing_pipeline.py
+++ b/tests/rag/test_docs_indexing_pipeline.py
@@ -0,0 +1,63 @@
 from app.modules.rag.contracts.enums import RagLayer
 from app.modules.rag.indexing.docs.pipeline import DocsIndexingPipeline
 def test_docs_pipeline_builds_catalog_facts_sections_and_policy() -> None:
    pipeline = DocsIndexingPipeline()
    content = """---
 id: api.billing.create_invoice
 type: policy
 domain: billing
 links:
  calls_api:
    - api.billing.validate_invoice
 tags: [billing]
 status: active
 ---
 # Create Invoice
 ## Spec Summary
 Creates an invoice in billing.
 ## Request Contract
 | field | type | required | validation |
 | --- | --- | --- | --- |
 | amount | decimal | yes | > 0 |
 ## Error Matrix
 | status | error | client action |
 | --- | --- | --- |
 | 400 | invalid_amount | fix request |
 ## Rules
 - metric: billing.invoice.created
 - rule: amount must be positive
 """
    docs = pipeline.index_file(
        repo_id="acme/proj",
        commit_sha="abc123",
        path="docs/billing/create_invoice.md",
        content=content,
    )
    layers = {doc.layer for doc in docs}
    assert RagLayer.DOCS_MODULE_CATALOG in layers
    assert RagLayer.DOCS_FACT_INDEX in layers
    assert RagLayer.DOCS_SECTION_INDEX in layers
    assert RagLayer.DOCS_POLICY_INDEX in layers
    module_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_MODULE_CATALOG)
    assert module_doc.metadata["module_id"] == "api.billing.create_invoice"
    assert module_doc.metadata["type"] == "policy"
    fact_texts = [doc.text for doc in docs if doc.layer == RagLayer.DOCS_FACT_INDEX]
    assert any("calls_api" in text for text in fact_texts)
    assert any("has_field" in text for text in fact_texts)
    assert any("returns_error" in text for text in fact_texts)
    section_doc = next(doc for doc in docs if doc.layer == RagLayer.DOCS_SECTION_INDEX)
    assert section_doc.metadata["section_path"]
--- a/tests/rag/test_query_router.py
+++ b/tests/rag/test_query_router.py
@@ -0,0 +1,12 @@
 from app.modules.rag.contracts.enums import RetrievalMode
 from app.modules.rag.retrieval.query_router import RagQueryRouter
 def test_query_router_uses_docs_by_default() -> None:
    router = RagQueryRouter()
    assert router.resolve_mode("Какие есть требования по биллингу?") == RetrievalMode.DOCS
 def test_query_router_switches_to_code_on_explicit_code_requests() -> None:
    router = RagQueryRouter()
    assert router.resolve_mode("Объясни как работает код endpoint create invoice") == RetrievalMode.CODE
--- a/tests/rag/test_query_terms.py
+++ b/tests/rag/test_query_terms.py
@@ -0,0 +1,9 @@
 from app.modules.rag.retrieval.query_terms import extract_query_terms
 def test_extract_query_terms_from_code_question() -> None:
    terms = extract_query_terms("Объясни по коду как можно управлять COnfigmanager?")
    assert "configmanager" in terms
    assert "config_manager" in terms
    assert "control" in terms