Фиксация изменений

2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions
--- a/docs/architecture/contracts_retrieval.json
+++ b/docs/architecture/contracts_retrieval.json
@@ -0,0 +1,380 @@
+{
+  "layers": {
+    "C0_SOURCE_CHUNKS": {
+      "retriever": {
+        "class": "RagService",
+        "file": "app/modules/rag/services/rag_service.py",
+        "method": "retrieve"
+      },
+      "indexer": {
+        "class": "CodeTextDocumentBuilder",
+        "file": "app/modules/rag/indexing/code/code_text/document_builder.py",
+        "method": "build"
+      },
+      "input": {
+        "type": "observed shape",
+        "fields": {
+          "rag_session_id": {
+            "type": "string",
+            "required": true
+          },
+          "query": {
+            "type": "string",
+            "required": true
+          },
+          "layers": {
+            "type": "implicit list[string]",
+            "required": false,
+            "source": "RagQueryRouter.layers_for_mode('code')"
+          }
+        }
+      },
+      "output": {
+        "type": "list[dict]",
+        "fields": {
+          "source": "string",
+          "content": "string",
+          "layer": "\"C0_SOURCE_CHUNKS\"",
+          "title": "string",
+          "metadata": {
+            "chunk_index": "int",
+            "chunk_type": "\"symbol_block\" | \"window\"",
+            "module_or_unit": "string",
+            "artifact_type": "\"CODE\""
+          },
+          "score": "float | null"
+        }
+      },
+      "examples": {
+        "input": {
+          "rag_session_id": "rag-123",
+          "query": "where is implemented get_user"
+        },
+        "output": {
+          "source": "app/api/users.py",
+          "content": "async def get_user(user_id: str):\n    service = UserService()\n    return service.get_user(user_id)",
+          "layer": "C0_SOURCE_CHUNKS",
+          "title": "app/api/users.py:get_user",
+          "metadata": {
+            "chunk_index": 0,
+            "chunk_type": "symbol_block",
+            "module_or_unit": "app.api.users",
+            "artifact_type": "CODE"
+          },
+          "score": 0.07
+        }
+      },
+      "defaults": {
+        "retrieve_limit": 8,
+        "embed_batch_size_env": "RAG_EMBED_BATCH_SIZE",
+        "embed_batch_size_default": 16,
+        "window_chunk_size_lines": 80,
+        "window_overlap_lines": 15
+      },
+      "limitations": [
+        "Line spans are stored but not returned in the public retrieval item shape.",
+        "No direct path or namespace filter is exposed through the retrieval endpoint."
+      ]
+    },
+    "C1_SYMBOL_CATALOG": {
+      "retriever": {
+        "class": "RagService",
+        "file": "app/modules/rag/services/rag_service.py",
+        "method": "retrieve"
+      },
+      "indexer": {
+        "class": "SymbolDocumentBuilder",
+        "file": "app/modules/rag/indexing/code/symbols/document_builder.py",
+        "method": "build"
+      },
+      "input": {
+        "type": "observed shape",
+        "fields": {
+          "rag_session_id": {
+            "type": "string",
+            "required": true
+          },
+          "query": {
+            "type": "string",
+            "required": true
+          },
+          "query_term_expansion": {
+            "type": "list[string]",
+            "required": false,
+            "source": "extract_query_terms(query_text)",
+            "max_items": 6
+          }
+        }
+      },
+      "output": {
+        "type": "list[dict]",
+        "fields": {
+          "source": "string",
+          "content": "string",
+          "layer": "\"C1_SYMBOL_CATALOG\"",
+          "title": "string",
+          "metadata": {
+            "symbol_id": "string",
+            "qname": "string",
+            "kind": "\"class\" | \"function\" | \"method\" | \"const\"",
+            "signature": "string",
+            "decorators_or_annotations": "list[string]",
+            "docstring_or_javadoc": "string | null",
+            "parent_symbol_id": "string | null",
+            "package_or_module": "string",
+            "is_entry_candidate": "bool",
+            "lang_payload": "object",
+            "artifact_type": "\"CODE\""
+          },
+          "score": "float | null"
+        }
+      },
+      "examples": {
+        "input": {
+          "rag_session_id": "rag-123",
+          "query": "where is implemented get_user"
+        },
+        "output": {
+          "source": "app/api/users.py",
+          "content": "function get_user\nget_user(user_id)",
+          "layer": "C1_SYMBOL_CATALOG",
+          "title": "get_user",
+          "metadata": {
+            "symbol_id": "sha256(...)",
+            "qname": "get_user",
+            "kind": "function",
+            "signature": "get_user(user_id)",
+            "decorators_or_annotations": [
+              "router.get"
+            ],
+            "docstring_or_javadoc": null,
+            "parent_symbol_id": null,
+            "package_or_module": "app.api.users",
+            "is_entry_candidate": true,
+            "lang_payload": {
+              "async": true
+            },
+            "artifact_type": "CODE"
+          },
+          "score": 0.07
+        }
+      },
+      "defaults": {
+        "retrieve_limit": 8,
+        "layer_rank": 1
+      },
+      "limitations": [
+        "Only Python AST symbols are indexed.",
+        "Cross-file resolution is not implemented.",
+        "parent_symbol_id is an observed qname-like value, not guaranteed to be a symbol hash."
+      ]
+    },
+    "C2_DEPENDENCY_GRAPH": {
+      "retriever": {
+        "class": "RagService",
+        "file": "app/modules/rag/services/rag_service.py",
+        "method": "retrieve"
+      },
+      "indexer": {
+        "class": "EdgeDocumentBuilder",
+        "file": "app/modules/rag/indexing/code/edges/document_builder.py",
+        "method": "build"
+      },
+      "input": {
+        "type": "observed shape",
+        "fields": {
+          "rag_session_id": {
+            "type": "string",
+            "required": true
+          },
+          "query": {
+            "type": "string",
+            "required": true
+          }
+        }
+      },
+      "output": {
+        "type": "list[dict]",
+        "fields": {
+          "source": "string",
+          "content": "string",
+          "layer": "\"C2_DEPENDENCY_GRAPH\"",
+          "title": "string",
+          "metadata": {
+            "edge_id": "string",
+            "edge_type": "\"calls\" | \"imports\" | \"inherits\"",
+            "src_symbol_id": "string",
+            "src_qname": "string",
+            "dst_symbol_id": "string | null",
+            "dst_ref": "string | null",
+            "resolution": "\"resolved\" | \"partial\"",
+            "lang_payload": "object",
+            "artifact_type": "\"CODE\""
+          },
+          "score": "float | null"
+        }
+      },
+      "examples": {
+        "input": {
+          "rag_session_id": "rag-123",
+          "query": "how get_user calls service"
+        },
+        "output": {
+          "source": "app/api/users.py",
+          "content": "get_user calls UserService",
+          "layer": "C2_DEPENDENCY_GRAPH",
+          "title": "get_user:calls",
+          "metadata": {
+            "edge_id": "sha256(...)",
+            "edge_type": "calls",
+            "src_symbol_id": "sha256(...)",
+            "src_qname": "get_user",
+            "dst_symbol_id": null,
+            "dst_ref": "UserService",
+            "resolution": "partial",
+            "lang_payload": {
+              "callsite_kind": "function_call"
+            },
+            "artifact_type": "CODE"
+          },
+          "score": 0.11
+        }
+      },
+      "defaults": {
+        "retrieve_limit": 8,
+        "layer_rank": 2,
+        "graph_build_mode": "static_python_ast"
+      },
+      "limitations": [
+        "No traversal API exists.",
+        "Edges are stored as retrievable rows, not as a graph-native store.",
+        "Destination resolution is local to one indexed file."
+      ]
+    },
+    "C3_ENTRYPOINTS": {
+      "retriever": {
+        "class": "RagService",
+        "file": "app/modules/rag/services/rag_service.py",
+        "method": "retrieve"
+      },
+      "indexer": {
+        "class": "EntrypointDocumentBuilder",
+        "file": "app/modules/rag/indexing/code/entrypoints/document_builder.py",
+        "method": "build"
+      },
+      "input": {
+        "type": "observed shape",
+        "fields": {
+          "rag_session_id": {
+            "type": "string",
+            "required": true
+          },
+          "query": {
+            "type": "string",
+            "required": true
+          }
+        }
+      },
+      "output": {
+        "type": "list[dict]",
+        "fields": {
+          "source": "string",
+          "content": "string",
+          "layer": "\"C3_ENTRYPOINTS\"",
+          "title": "string",
+          "metadata": {
+            "entry_id": "string",
+            "entry_type": "\"http\" | \"cli\"",
+            "framework": "\"fastapi\" | \"flask\" | \"typer\" | \"click\"",
+            "route_or_command": "string",
+            "handler_symbol_id": "string",
+            "lang_payload": "object",
+            "artifact_type": "\"CODE\""
+          },
+          "score": "float | null"
+        }
+      },
+      "examples": {
+        "input": {
+          "rag_session_id": "rag-123",
+          "query": "which endpoint handles get user"
+        },
+        "output": {
+          "source": "app/api/users.py",
+          "content": "fastapi http \"/users/{user_id}\"",
+          "layer": "C3_ENTRYPOINTS",
+          "title": "\"/users/{user_id}\"",
+          "metadata": {
+            "entry_id": "sha256(...)",
+            "entry_type": "http",
+            "framework": "fastapi",
+            "route_or_command": "\"/users/{user_id}\"",
+            "handler_symbol_id": "sha256(...)",
+            "lang_payload": {
+              "methods": [
+                "GET"
+              ]
+            },
+            "artifact_type": "CODE"
+          },
+          "score": 0.05
+        }
+      },
+      "defaults": {
+        "retrieve_limit": 8,
+        "layer_rank": 0
+      },
+      "limitations": [
+        "Detection is decorator-string based.",
+        "No Django, Celery, RQ, or cron entrypoints were found.",
+        "Returned payload does not expose line spans."
+      ]
+    }
+  },
+  "retrieval_endpoint": {
+    "entrypoint": {
+      "file": "app/modules/rag_session/module.py",
+      "method": "internal_router.retrieve"
+    },
+    "request": {
+      "type": "dict",
+      "fields": {
+        "rag_session_id": "string | optional if project_id provided",
+        "project_id": "string | optional fallback for rag_session_id",
+        "query": "string"
+      }
+    },
+    "response": {
+      "type": "dict",
+      "fields": {
+        "items": "list[retrieval item]"
+      }
+    },
+    "defaults": {
+      "mode": "docs unless RagQueryRouter detects code hints",
+      "limit": 8,
+      "embedding_provider": "GigaChat embeddings",
+      "fallback_after_embedding_error": true,
+      "fallback_to_docs_when_code_empty": true
+    }
+  },
+  "ranking": {
+    "storage": "PostgreSQL rag_chunks + pgvector",
+    "query_repository": {
+      "class": "RagQueryRepository",
+      "file": "app/modules/rag/persistence/query_repository.py",
+      "method": "retrieve"
+    },
+    "order_by": [
+      "lexical_rank ASC",
+      "test_penalty ASC",
+      "layer_rank ASC",
+      "embedding <=> query_embedding ASC"
+    ],
+    "notes": [
+      "lexical_rank is derived from qname/symbol_id/title/path/content matching extracted query terms",
+      "test_penalty is applied only when prefer_non_tests=true",
+      "layer priority is C3 > C1 > C2 > C0 for code retrieval"
+    ]
+  }
+}
--- a/docs/architecture/llm_inventory.md
+++ b/docs/architecture/llm_inventory.md
@@ -0,0 +1,270 @@
+# LLM Inventory
+
+## Provider and SDK
+
+- Provider in code: GigaChat / Sber
+- Local SDK style: custom thin HTTP client over `requests`
+- Core files:
+  - `app/modules/shared/gigachat/client.py`
+  - `app/modules/shared/gigachat/settings.py`
+  - `app/modules/shared/gigachat/token_provider.py`
+  - `app/modules/agent/llm/service.py`
+
+There is no OpenAI SDK, Azure SDK, or local model runtime in the current implementation.
+
+## Configuration
+
+Model and endpoint configuration are read from environment in `GigaChatSettings.from_env()`:
+
+- `GIGACHAT_AUTH_URL`
+  - default: `https://ngw.devices.sberbank.ru:9443/api/v2/oauth`
+- `GIGACHAT_API_URL`
+  - default: `https://gigachat.devices.sberbank.ru/api/v1`
+- `GIGACHAT_SCOPE`
+  - default: `GIGACHAT_API_PERS`
+- `GIGACHAT_TOKEN`
+  - required for auth
+- `GIGACHAT_SSL_VERIFY`
+  - default: `true`
+- `GIGACHAT_MODEL`
+  - default: `GigaChat`
+- `GIGACHAT_EMBEDDING_MODEL`
+  - default: `Embeddings`
+- `AGENT_PROMPTS_DIR`
+  - optional prompt directory override
+
+PostgreSQL config for retrieval storage is separate:
+
+- `DATABASE_URL`
+  - default: `postgresql+psycopg://agent:agent@db:5432/agent`
+
+## Default models
+
+- Chat/completions model default: `GigaChat`
+- Embedding model default: `Embeddings`
+
+## Completion payload
+
+Observed payload sent by `GigaChatClient.complete(...)`:
+
+```json
+{
+  "model": "GigaChat",
+  "messages": [
+    {"role": "system", "content": "<prompt template text>"},
+    {"role": "user", "content": "<runtime user input>"}
+  ]
+}
+```
+
+Endpoint:
+
+- `POST {GIGACHAT_API_URL}/chat/completions`
+
+Observed response handling:
+
+- reads `choices[0].message.content`
+- if no choices: returns empty string
+
+## Embeddings payload
+
+Observed payload sent by `GigaChatClient.embed(...)`:
+
+```json
+{
+  "model": "Embeddings",
+  "input": [
+    "<text1>",
+    "<text2>"
+  ]
+}
+```
+
+Endpoint:
+
+- `POST {GIGACHAT_API_URL}/embeddings`
+
+Observed response handling:
+
+- expects `data` list
+- maps each `item.embedding` to `list[float]`
+
+## Parameters
+
+### Explicitly implemented
+
+- `model`
+- `messages`
+- `input`
+- HTTP timeout:
+  - completions: `90s`
+  - embeddings: `90s`
+  - auth: `30s`
+- TLS verification flag:
+  - `verify=settings.ssl_verify`
+
+### Not implemented in payload
+
+- `temperature`
+- `top_p`
+- `max_tokens`
+- `response_format`
+- tools/function calling
+- streaming
+- seed
+- stop sequences
+
+`ASSUMPTION:` the service uses provider defaults for sampling and output length because these fields are not sent in the request payload.
+
+## Context and budget limits
+
+There is no centralized token budget manager in the current code.
+
+Observed practical limits instead:
+
+- prompt file text is loaded as-is from disk
+- user input is passed as-is
+- RAG context shaping happens outside the LLM client
+- docs indexing summary truncation:
+  - docs module catalog summary: `4000` chars
+  - docs policy text: `4000` chars
+- project QA source bundle caps:
+  - top `12` rag items
+  - top `10` file candidates
+- logging truncation only:
+  - LLM input/output logs capped at `1500` chars for logs
+
+`ASSUMPTION:` there is no explicit max-context enforcement before chat completion requests. The current system relies on upstream graph logic to keep inputs small enough.
+
+## Retry, backoff, timeout
+
+### Timeouts
+
+- auth: `30s`
+- chat completion: `90s`
+- embeddings: `90s`
+
+### Retry
+
+- Generic async retry wrapper exists in `app/modules/shared/retry_executor.py`
+- It retries only:
+  - `TimeoutError`
+  - `ConnectionError`
+  - `OSError`
+- Retry constants:
+  - `MAX_RETRIES = 5`
+  - backoff: `0.1 * attempt` seconds
+
+### Important current limitation
+
+- `GigaChatClient` raises `GigaChatError` on HTTP and request failures.
+- `RetryExecutor` does not catch `GigaChatError`.
+- Result: LLM and embeddings calls are effectively not retried by this generic retry helper unless errors are converted upstream.
+
+## Prompt formation
+
+Prompt loading is handled by `PromptLoader`:
+
+- base dir: `app/modules/agent/prompts`
+- override: `AGENT_PROMPTS_DIR`
+- file naming convention: `<prompt_name>.txt`
+
+Prompt composition model today:
+
+- system prompt:
+  - full contents of selected prompt file
+- user prompt:
+  - raw runtime input string passed by the caller
+- no separate developer prompt layer in the application payload
+
+If a prompt file is missing:
+
+- fallback system prompt: `You are a helpful assistant.`
+
+## Prompt templates present
+
+- `router_intent`
+- `general_answer`
+- `project_answer`
+- `docs_detect`
+- `docs_strategy`
+- `docs_plan_sections`
+- `docs_generation`
+- `docs_self_check`
+- `docs_execution_summary`
+- `project_edits_plan`
+- `project_edits_hunks`
+- `project_edits_self_check`
+
+## Key LLM call entrypoints
+
+### Composition roots
+
+- `app/modules/agent/module.py`
+  - builds `GigaChatSettings`
+  - builds `GigaChatTokenProvider`
+  - builds `GigaChatClient`
+  - builds `PromptLoader`
+  - builds `AgentLlmService`
+- `app/modules/rag_session/module.py`
+  - builds the same provider stack for embeddings used by RAG
+
+### Main abstraction
+
+- `AgentLlmService.generate(prompt_name, user_input, log_context=None)`
+
+### Current generate callsites
+
+- `app/modules/agent/engine/router/intent_classifier.py`
+  - `router_intent`
+- `app/modules/agent/engine/graphs/base_graph.py`
+  - `general_answer`
+- `app/modules/agent/engine/graphs/project_qa_graph.py`
+  - `project_answer`
+- `app/modules/agent/engine/graphs/docs_graph_logic.py`
+  - `docs_detect`
+  - `docs_strategy`
+  - `docs_plan_sections`
+  - `docs_generation`
+  - `docs_self_check`
+  - `docs_execution_summary`-like usage via summary step
+- `app/modules/agent/engine/graphs/project_edits_logic.py`
+  - `project_edits_plan`
+  - `project_edits_self_check`
+  - `project_edits_hunks`
+
+## Logging and observability
+
+`AgentLlmService` logs:
+
+- input:
+  - `graph llm input: context=... prompt=... user_input=...`
+- output:
+  - `graph llm output: context=... prompt=... output=...`
+
+Log truncation:
+
+- 1500 chars
+
+RAG retrieval logs separately in `RagService`, but without embedding vectors.
+
+## Integration with retrieval
+
+There are two distinct GigaChat usages:
+
+1. Chat/completion path for agent reasoning and generation
+2. Embedding path for RAG indexing and retrieval
+
+The embedding adapter is `GigaChatEmbedder`, used by:
+
+- `app/modules/rag/services/rag_service.py`
+
+## Notable limitations
+
+- Single provider coupling: chat and embeddings both depend on GigaChat-specific endpoints.
+- No model routing by scenario.
+- No tool/function calling.
+- No centralized prompt token budgeting.
+- No explicit retry for `GigaChatError`.
+- No streaming completions.
+- No structured response mode beyond prompt conventions and downstream parsing.
--- a/docs/architecture/rag_chunks_column_audit.md
+++ b/docs/architecture/rag_chunks_column_audit.md
@@ -0,0 +1,13 @@
+| column | used_by | safe_to_drop | notes |
+| --- | --- | --- | --- |
+| `layer` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Core selector for C0-C3 and D1-D4 queries. |
+| `title` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Used in lexical ranking and prompt evidence labels. |
+| `metadata_json` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | C2/C0 graph lookups and docs metadata depend on it. |
+| `span_start`, `span_end` | `USED_BY_CODE_V2` | no | Needed for symbol-to-chunk resolution and locations. |
+| `symbol_id`, `qname`, `kind`, `lang` | `USED_BY_CODE_V2` | no | Used by code indexing, ranking, trace building, and diagnostics. |
+| `repo_id`, `commit_sha` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Used by indexing/cache and retained for provenance. |
+| `entrypoint_type`, `framework` | `USED_BY_CODE_V2` | no | Used by C3 filtering and entrypoint diagnostics. |
+| `doc_kind`, `module_id`, `section_path` | `USED_BY_DOCS_INDEXING` | no | Still written by docs indexing and covered by docs tests. |
+| `artifact_type`, `section`, `doc_version`, `owner`, `system_component`, `last_modified`, `staleness_score` | `USED_BY_DOCS_INDEXING` | no | File metadata still flows through indexing/cache; left intact for now. |
+| `rag_doc_id` | `UNUSED` | yes | Written into `rag_chunks` only; no reads in runtime/indexing code. |
+| `links_json` | `UNUSED` | yes | Stored in `rag_chunks` only; reads exist for `rag_chunk_cache`, not `rag_chunks`. |
--- a/docs/architecture/retrieval_callgraph.mmd
+++ b/docs/architecture/retrieval_callgraph.mmd
@@ -0,0 +1,31 @@
+flowchart TD
+    A["HTTP: POST /internal/rag/retrieve"] --> B["RagModule.internal_router.retrieve(payload)"]
+    B --> C["RagService.retrieve(rag_session_id, query)"]
+    C --> D["RagQueryRouter.resolve_mode(query)"]
+    D --> E["RagQueryRouter.layers_for_mode(mode)"]
+    C --> F["GigaChatEmbedder.embed([query])"]
+    F --> G["GigaChatClient.embed(payload)"]
+    G --> H["POST /embeddings"]
+    C --> I["RagRepository.retrieve(...)"]
+    I --> J["RagQueryRepository.retrieve(...)"]
+    J --> K["PostgreSQL rag_chunks + pgvector"]
+    K --> L["ORDER BY lexical_rank, test_penalty, layer_rank, vector distance"]
+    L --> M["rows: path/content/layer/title/metadata/span/distance"]
+    M --> N["normalize to {source, content, layer, title, metadata, score}"]
+    N --> O["response: {items: [...]}"]
+
+    C --> P["embedding error?"]
+    P -->|yes| Q["RagRepository.fallback_chunks(...)"]
+    Q --> R["latest rows by id DESC"]
+    R --> N
+
+    C --> S["no rows and mode != docs?"]
+    S -->|yes| T["fallback to docs layers"]
+    T --> I
+
+    U["GraphAgentRuntime for project/qa"] --> V["ProjectQaRetrievalGraphFactory._retrieve_context"]
+    V --> C
+    V --> W["ProjectQaSupport.build_source_bundle(...)"]
+    W --> X["source_bundle"]
+    X --> Y["context_analysis"]
+    Y --> Z["answer_composition"]
--- a/docs/architecture/retrieval_inventory.md
+++ b/docs/architecture/retrieval_inventory.md
@@ -0,0 +1,457 @@
+# Retrieval Inventory
+
+## Scope and method
+
+This document describes the retrieval and indexing pipeline as implemented in code today. The inventory is based primarily on:
+
+- `app/modules/rag/services/rag_service.py`
+- `app/modules/rag/persistence/*.py`
+- `app/modules/rag/indexing/code/**/*.py`
+- `app/modules/rag/indexing/docs/**/*.py`
+- `app/modules/rag_session/module.py`
+- `app/modules/agent/engine/graphs/project_qa_step_graphs.py`
+- `app/modules/agent/engine/orchestrator/*.py`
+
+`ASSUMPTION:` the intended layer semantics are the ones implied by code and tests, not by future architecture plans. This matters because only `C0` through `C3` are materially implemented today; `C4+` exist only as enum constants.
+
+## Current retrieval pipeline
+
+1. Retrieval entrypoint is `POST /internal/rag/retrieve` in `app/modules/rag_session/module.py`.
+2. The endpoint calls `RagService.retrieve(rag_session_id, query)`.
+3. `RagQueryRouter` chooses `docs` or `code` mode from the raw query text.
+4. `RagService` computes a single embedding for the full query via `GigaChatEmbedder`.
+5. `RagQueryRepository.retrieve(...)` runs one SQL query against `rag_chunks` in PostgreSQL with `pgvector`.
+6. Ranking order is:
+   - lexical rank
+   - test-file penalty
+   - layer rank
+   - vector distance `embedding <=> query_embedding`
+7. Response items are normalized to `{source, content, layer, title, metadata, score}`.
+8. If embeddings fail, retrieval falls back to latest chunks from the same layers.
+9. If code retrieval returns nothing, service falls back to docs layers.
+
+## Storage and indices
+
+- Primary store: PostgreSQL from `DATABASE_URL`, configured in `app/modules/shared/db.py`.
+- Vector extension: `CREATE EXTENSION IF NOT EXISTS vector` in `app/modules/rag/persistence/schema_repository.py`.
+- Primary table: `rag_chunks`.
+- Cache tables:
+  - `rag_blob_cache`
+  - `rag_chunk_cache`
+  - `rag_session_chunk_map`
+- SQL indexes currently created:
+  - `(rag_session_id)`
+  - `(rag_session_id, layer)`
+  - `(rag_session_id, layer, path)`
+  - `(qname)`
+  - `(symbol_id)`
+  - `(module_id)`
+  - `(doc_kind)`
+  - `(entrypoint_type, framework)`
+
+`ASSUMPTION:` there is no explicit ANN index for the vector column in schema code. The code creates general SQL indexes, but no `ivfflat`/`hnsw` index is defined here.
+
+## Layer: C0_SOURCE_CHUNKS
+
+### Implementation
+
+- Produced by `CodeIndexingPipeline.index_file(...)` in `app/modules/rag/indexing/code/pipeline.py`.
+- Chunking logic: `CodeTextChunker.chunk(...)` in `app/modules/rag/indexing/code/code_text/chunker.py`.
+- Document builder: `CodeTextDocumentBuilder.build(...)` in `app/modules/rag/indexing/code/code_text/document_builder.py`.
+- Persisted via `RagDocumentRepository.insert_documents(...)` into `rag_chunks`.
+
+### Input contract
+
+This is an indexing layer, not a direct public retriever. The observed upstream indexing input is a file dict with at least:
+
+- required:
+  - `path: str`
+  - `content: str`
+- optional:
+  - `commit_sha: str | None`
+  - `content_hash: str`
+  - metadata fields copied through by `RagService._document_metadata(...)`
+
+For retrieval, the layer is queried only indirectly through:
+
+- `rag_session_id: str`
+- `query: str`
+- inferred mode/layers from `RagQueryRouter`
+- fixed `limit=8`
+
+### Output contract
+
+Stored document shape:
+
+- top-level:
+  - `layer = "C0_SOURCE_CHUNKS"`
+  - `lang = "python"`
+  - `source.repo_id`
+  - `source.commit_sha`
+  - `source.path`
+  - `title`
+  - `text`
+  - `span.start_line`
+  - `span.end_line`
+  - `embedding`
+- metadata:
+  - `chunk_index`
+  - `chunk_type`: `symbol_block` or `window`
+  - `module_or_unit`
+  - `artifact_type = "CODE"`
+  - plus file-level metadata injected by `RagService`
+
+Returned retrieval item shape:
+
+- `source`
+- `content`
+- `layer`
+- `title`
+- `metadata`
+- `score`
+
+No `line_start` / `line_end` are returned to the caller directly; they remain in DB columns `span_start` / `span_end` and are only used in logs.
+
+### Defaults & limits
+
+- AST chunking prefers one chunk per top-level class/function/async function.
+- Fallback window chunking:
+  - `size = 80` lines
+  - `overlap = 15` lines
+- Global retrieval limit from `RagService.retrieve(...)`: `8`
+- Embedding batch size from env:
+  - `RAG_EMBED_BATCH_SIZE`
+  - default `16`
+
+### Known issues
+
+- Nested methods/functions are not emitted as C0 chunks unless represented inside a selected top-level block.
+- Returned API payload omits line spans even though storage has them.
+- No direct filter by path, namespace, symbol, or `top_k` is exposed through the current endpoint.
+
+## Layer: C1_SYMBOL_CATALOG
+
+### Implementation
+
+- Symbol extraction: `SymbolExtractor.extract(...)` in `app/modules/rag/indexing/code/symbols/extractor.py`.
+- AST parsing: `PythonAstParser.parse_module(...)`.
+- Document builder: `SymbolDocumentBuilder.build(...)`.
+- Retrieval reads rows from `rag_chunks`; there is no dedicated symbol table.
+
+### Input contract
+
+Indexing input is the same per-file payload as C0.
+
+Observed symbol extraction source:
+
+- Python AST only
+- supported symbol kinds:
+  - `class`
+  - `function`
+  - `method`
+  - `const` for top-level imports/import aliases
+
+Retrieval input is still the generic text query endpoint. Query terms are enriched by `extract_query_terms(...)`:
+
+- extracts identifier-like tokens from query text
+- normalizes camelCase/PascalCase to snake_case
+- adds special intent terms for management/control-related queries
+- max observed query terms: `6`
+
+### Output contract
+
+Stored document shape:
+
+- top-level:
+  - `layer = "C1_SYMBOL_CATALOG"`
+  - `title = qname`
+  - `text = "<kind> <qname>\n<signature>\n<docstring?>"`
+  - `span.start_line`
+  - `span.end_line`
+- metadata:
+  - `symbol_id`
+  - `qname`
+  - `kind`
+  - `signature`
+  - `decorators_or_annotations`
+  - `docstring_or_javadoc`
+  - `parent_symbol_id`
+  - `package_or_module`
+  - `is_entry_candidate`
+  - `lang_payload`
+  - `artifact_type = "CODE"`
+
+Observed `lang_payload` variants:
+
+- class:
+  - `bases`
+- function/method:
+  - `async`
+- import alias:
+  - `imported_from`
+  - `import_alias`
+
+### Defaults & limits
+
+- Only Python source files are indexed into C-layers.
+- Import and import-from declarations are materialized as `const` symbols only at module top level.
+- Retrieval ranking gives C1 priority rank `1`, after C3 and before C2/C0.
+
+### Known issues
+
+- No explicit visibility/public-private model.
+- `parent_symbol_id` currently stores the parent qname string from the stack, not the parent symbol hash. This is an observed implementation detail.
+- Cross-file symbol resolution is not implemented; `dst_symbol_id` in edges resolves only against symbols extracted from the same file.
+
+## Layer: C2_DEPENDENCY_GRAPH
+
+### Implementation
+
+- Edge extraction: `EdgeExtractor.extract(...)` in `app/modules/rag/indexing/code/edges/extractor.py`.
+- Document builder: `EdgeDocumentBuilder.build(...)`.
+- Built during `CodeIndexingPipeline.index_file(...)`.
+
+### Input contract
+
+Indexing input is the same per-file source payload as C0/C1.
+
+Graph construction method:
+
+- static analysis only
+- Python AST walk only
+- no runtime tracing
+- no tree-sitter
+
+Observed edge types:
+
+- `calls`
+- `imports`
+- `inherits`
+
+### Output contract
+
+Stored document shape:
+
+- top-level:
+  - `layer = "C2_DEPENDENCY_GRAPH"`
+  - `title = "<src_qname>:<edge_type>"`
+  - `text = "<src_qname> <edge_type> <dst>"`
+  - `span.start_line`
+  - `span.end_line`
+  - `links` contains one evidence link of type `EDGE`
+- metadata:
+  - `edge_id`
+  - `edge_type`
+  - `src_symbol_id`
+  - `src_qname`
+  - `dst_symbol_id`
+  - `dst_ref`
+  - `resolution`: `resolved` or `partial`
+  - `lang_payload`
+  - `artifact_type = "CODE"`
+
+Observed `lang_payload` usage:
+
+- for calls: may include `callsite_kind = "function_call"`
+
+### Defaults & limits
+
+- Edge extraction is per-file only.
+- `imports` edges are emitted only while visiting a class/function scope; top-level imports do not become C2 edges.
+- Layer rank in retrieval SQL: `2`
+
+### Known issues
+
+- There is no traversal API, graph repository, or query language over C2. Retrieval only treats edges as text/vector rows in `rag_chunks`.
+- Destination resolution is local to the file-level qname map.
+- Top-level module import relationships are incompletely represented because `visit_Import` / `visit_ImportFrom` skip when there is no current scope.
+
+## Layer: C3_ENTRYPOINTS
+
+### Implementation
+
+- Detection registry: `EntrypointDetectorRegistry.detect_all(...)`.
+- Detectors:
+  - `FastApiEntrypointDetector`
+  - `FlaskEntrypointDetector`
+  - `TyperClickEntrypointDetector`
+- Document builder: `EntrypointDocumentBuilder.build(...)`.
+
+### Input contract
+
+Indexing input is the same per-file source payload as other C-layers.
+
+Detected entrypoint families today:
+
+- HTTP:
+  - FastAPI decorators such as `.get`, `.post`, `.put`, `.patch`, `.delete`, `.route`
+  - Flask `.route`
+- CLI:
+  - Typer/Click `.command`
+  - Typer/Click `.callback`
+
+Not detected:
+
+- Django routes
+- Celery tasks
+- RQ jobs
+- cron jobs / scheduler entries
+
+### Output contract
+
+Stored document shape:
+
+- top-level:
+  - `layer = "C3_ENTRYPOINTS"`
+  - `title = route_or_command`
+  - `text = "<framework> <entry_type> <route_or_command>"`
+  - `span.start_line`
+  - `span.end_line`
+  - `links` contains one evidence link of type `CODE_SPAN`
+- metadata:
+  - `entry_id`
+  - `entry_type`: observed `http` or `cli`
+  - `framework`: observed `fastapi`, `flask`, `typer`, `click`
+  - `route_or_command`
+  - `handler_symbol_id`
+  - `lang_payload`
+  - `artifact_type = "CODE"`
+
+FastAPI-specific observed payload:
+
+- `lang_payload.methods = [HTTP_METHOD]` for `.get/.post/...`
+
+### Defaults & limits
+
+- Retrieval layer rank: `0` highest among code layers.
+- Entrypoint mapping is handler-symbol centric:
+  - decorator match -> symbol -> `handler_symbol_id`
+  - physical location comes from symbol span
+
+### Known issues
+
+- Route parsing is string-based from decorator text, not semantic AST argument parsing.
+- No dedicated entrypoint tags beyond `entry_type`, `framework`, and raw decorator-derived payload.
+- Background jobs and non-decorator entrypoints are not indexed.
+
+## Dependency graph / trace current state
+
+### Exists or stub?
+
+- C2 exists and is populated.
+- It is not a stub.
+- It is also not a full-project dependency graph service; it is a set of per-edge documents stored in `rag_chunks`.
+
+### How the graph is built
+
+- static Python AST analysis
+- no runtime instrumentation
+- no import graph resolver across modules
+- no tree-sitter
+
+### Edge types in data
+
+- `calls`
+- `imports`
+- `inherits`
+
+### Traversal API
+
+- No traversal API was found in `app/modules/rag/*` or `app/modules/agent/*`.
+- No method accepts graph traversal parameters such as depth, start node, edge filters, or BFS/DFS strategy.
+- Current access path is only retrieval over indexed edge documents.
+
+## Entrypoints current state
+
+### Implemented extraction
+
+- HTTP routes:
+  - FastAPI
+  - Flask
+- CLI:
+  - Typer
+  - Click
+
+### Mapping model
+
+- `entrypoint -> handler_symbol_id -> symbol span/path`
+- The entrypoint record itself stores:
+  - framework
+  - entry type
+  - raw route/command string
+  - handler symbol id
+
+### Tags/types
+
+- `entry_type` is the main normalized tag.
+- Observed values: `http`, `cli`.
+- `framework` is the second discriminator.
+- There are no richer endpoint taxonomies such as `job`, `worker`, `webhook`, `scheduler`.
+
+## Defaults and operational limits
+
+- Query mode default: `docs`
+- Code mode is enabled by keyword heuristics in `RagQueryRouter`
+- Retrieval hard limit: `8`
+- Fallback limit: `8`
+- Query term extraction limit: `6`
+- Ranked source bundle for project QA:
+  - top `12` RAG items
+  - top `10` file candidates
+- No exposed `namespace`, `path_prefixes`, `top_k`, `max_chars`, `max_chunks`, `max_depth` in the public/internal retrieval endpoint
+
+`ASSUMPTION:` the absence of these controls in endpoint and service signatures means they are not part of the current supported contract, even though `RagQueryRepository.retrieve(...)` has an internal `path_prefixes` parameter.
+
+## Known cross-cutting issues
+
+- Retrieval contract is effectively text-only at API level; structured retrieval exists only as internal SQL parameters.
+- Response payload drops explicit line spans even though spans are stored.
+- Vector retrieval is coupled to a single provider-specific embedder.
+- Docs mode is the default, so code retrieval depends on heuristic query phrasing unless the project/qa graph prepends `по коду`.
+- There is no separate retrieval contract per layer exposed over API; all layer selection is implicit.
+
+## Where to plug ExplainPack pipeline
+
+### Option 1: replace or extend `project_qa/context_analysis`
+
+- Code location:
+  - `app/modules/agent/engine/graphs/project_qa_step_graphs.py`
+- Why:
+  - retrieval is already complete at this step
+  - input bundle already contains ranked `rag_items` and `file_candidates`
+  - output is already a structured `analysis_brief`
+- Risk:
+  - low
+  - minimal invasion if ExplainPack consumes `source_bundle` and emits the same `analysis_brief` shape
+
+### Option 2: insert a new orchestrator step between `context_retrieval` and `context_analysis`
+
+- Code location:
+  - `app/modules/agent/engine/orchestrator/template_registry.py`
+  - `app/modules/agent/engine/orchestrator/step_registry.py`
+- Why:
+  - preserves current retrieval behavior
+  - makes ExplainPack an explicit pipeline stage with its own artifact
+  - cleanest for observability and future A/B migration
+- Risk:
+  - low to medium
+  - requires one new artifact contract and one extra orchestration step, but no change to retrieval storage
+
+### Option 3: introduce ExplainPack inside `ExplainActions.extract_logic`
+
+- Code location:
+  - `app/modules/agent/engine/orchestrator/actions/explain_actions.py`
+- Why:
+  - useful if ExplainPack is meant only for explain-style scenarios
+  - keeps general project QA untouched
+- Risk:
+  - medium
+  - narrower integration point; may create duplicate reasoning logic separate from project QA analysis path
+
+## Bottom line
+
+- C0-C3 are implemented and persisted in one physical store: `rag_chunks`.
+- Retrieval is a hybrid SQL ranking over lexical heuristics plus pgvector distance.
+- C2 exists, but only as retrievable edge documents, not as a traversable graph subsystem.
+- C3 covers FastAPI/Flask/Typer/Click only.
+- The least invasive ExplainPack integration point is after retrieval and before answer composition, preferably as a new explicit orchestrator artifact or as a replacement for `context_analysis`.