Фиксация изменений

This commit is contained in:
2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions

View File

@@ -0,0 +1,380 @@
{
"layers": {
"C0_SOURCE_CHUNKS": {
"retriever": {
"class": "RagService",
"file": "app/modules/rag/services/rag_service.py",
"method": "retrieve"
},
"indexer": {
"class": "CodeTextDocumentBuilder",
"file": "app/modules/rag/indexing/code/code_text/document_builder.py",
"method": "build"
},
"input": {
"type": "observed shape",
"fields": {
"rag_session_id": {
"type": "string",
"required": true
},
"query": {
"type": "string",
"required": true
},
"layers": {
"type": "implicit list[string]",
"required": false,
"source": "RagQueryRouter.layers_for_mode('code')"
}
}
},
"output": {
"type": "list[dict]",
"fields": {
"source": "string",
"content": "string",
"layer": "\"C0_SOURCE_CHUNKS\"",
"title": "string",
"metadata": {
"chunk_index": "int",
"chunk_type": "\"symbol_block\" | \"window\"",
"module_or_unit": "string",
"artifact_type": "\"CODE\""
},
"score": "float | null"
}
},
"examples": {
"input": {
"rag_session_id": "rag-123",
"query": "where is implemented get_user"
},
"output": {
"source": "app/api/users.py",
"content": "async def get_user(user_id: str):\n service = UserService()\n return service.get_user(user_id)",
"layer": "C0_SOURCE_CHUNKS",
"title": "app/api/users.py:get_user",
"metadata": {
"chunk_index": 0,
"chunk_type": "symbol_block",
"module_or_unit": "app.api.users",
"artifact_type": "CODE"
},
"score": 0.07
}
},
"defaults": {
"retrieve_limit": 8,
"embed_batch_size_env": "RAG_EMBED_BATCH_SIZE",
"embed_batch_size_default": 16,
"window_chunk_size_lines": 80,
"window_overlap_lines": 15
},
"limitations": [
"Line spans are stored but not returned in the public retrieval item shape.",
"No direct path or namespace filter is exposed through the retrieval endpoint."
]
},
"C1_SYMBOL_CATALOG": {
"retriever": {
"class": "RagService",
"file": "app/modules/rag/services/rag_service.py",
"method": "retrieve"
},
"indexer": {
"class": "SymbolDocumentBuilder",
"file": "app/modules/rag/indexing/code/symbols/document_builder.py",
"method": "build"
},
"input": {
"type": "observed shape",
"fields": {
"rag_session_id": {
"type": "string",
"required": true
},
"query": {
"type": "string",
"required": true
},
"query_term_expansion": {
"type": "list[string]",
"required": false,
"source": "extract_query_terms(query_text)",
"max_items": 6
}
}
},
"output": {
"type": "list[dict]",
"fields": {
"source": "string",
"content": "string",
"layer": "\"C1_SYMBOL_CATALOG\"",
"title": "string",
"metadata": {
"symbol_id": "string",
"qname": "string",
"kind": "\"class\" | \"function\" | \"method\" | \"const\"",
"signature": "string",
"decorators_or_annotations": "list[string]",
"docstring_or_javadoc": "string | null",
"parent_symbol_id": "string | null",
"package_or_module": "string",
"is_entry_candidate": "bool",
"lang_payload": "object",
"artifact_type": "\"CODE\""
},
"score": "float | null"
}
},
"examples": {
"input": {
"rag_session_id": "rag-123",
"query": "where is implemented get_user"
},
"output": {
"source": "app/api/users.py",
"content": "function get_user\nget_user(user_id)",
"layer": "C1_SYMBOL_CATALOG",
"title": "get_user",
"metadata": {
"symbol_id": "sha256(...)",
"qname": "get_user",
"kind": "function",
"signature": "get_user(user_id)",
"decorators_or_annotations": [
"router.get"
],
"docstring_or_javadoc": null,
"parent_symbol_id": null,
"package_or_module": "app.api.users",
"is_entry_candidate": true,
"lang_payload": {
"async": true
},
"artifact_type": "CODE"
},
"score": 0.07
}
},
"defaults": {
"retrieve_limit": 8,
"layer_rank": 1
},
"limitations": [
"Only Python AST symbols are indexed.",
"Cross-file resolution is not implemented.",
"parent_symbol_id is an observed qname-like value, not guaranteed to be a symbol hash."
]
},
"C2_DEPENDENCY_GRAPH": {
"retriever": {
"class": "RagService",
"file": "app/modules/rag/services/rag_service.py",
"method": "retrieve"
},
"indexer": {
"class": "EdgeDocumentBuilder",
"file": "app/modules/rag/indexing/code/edges/document_builder.py",
"method": "build"
},
"input": {
"type": "observed shape",
"fields": {
"rag_session_id": {
"type": "string",
"required": true
},
"query": {
"type": "string",
"required": true
}
}
},
"output": {
"type": "list[dict]",
"fields": {
"source": "string",
"content": "string",
"layer": "\"C2_DEPENDENCY_GRAPH\"",
"title": "string",
"metadata": {
"edge_id": "string",
"edge_type": "\"calls\" | \"imports\" | \"inherits\"",
"src_symbol_id": "string",
"src_qname": "string",
"dst_symbol_id": "string | null",
"dst_ref": "string | null",
"resolution": "\"resolved\" | \"partial\"",
"lang_payload": "object",
"artifact_type": "\"CODE\""
},
"score": "float | null"
}
},
"examples": {
"input": {
"rag_session_id": "rag-123",
"query": "how get_user calls service"
},
"output": {
"source": "app/api/users.py",
"content": "get_user calls UserService",
"layer": "C2_DEPENDENCY_GRAPH",
"title": "get_user:calls",
"metadata": {
"edge_id": "sha256(...)",
"edge_type": "calls",
"src_symbol_id": "sha256(...)",
"src_qname": "get_user",
"dst_symbol_id": null,
"dst_ref": "UserService",
"resolution": "partial",
"lang_payload": {
"callsite_kind": "function_call"
},
"artifact_type": "CODE"
},
"score": 0.11
}
},
"defaults": {
"retrieve_limit": 8,
"layer_rank": 2,
"graph_build_mode": "static_python_ast"
},
"limitations": [
"No traversal API exists.",
"Edges are stored as retrievable rows, not as a graph-native store.",
"Destination resolution is local to one indexed file."
]
},
"C3_ENTRYPOINTS": {
"retriever": {
"class": "RagService",
"file": "app/modules/rag/services/rag_service.py",
"method": "retrieve"
},
"indexer": {
"class": "EntrypointDocumentBuilder",
"file": "app/modules/rag/indexing/code/entrypoints/document_builder.py",
"method": "build"
},
"input": {
"type": "observed shape",
"fields": {
"rag_session_id": {
"type": "string",
"required": true
},
"query": {
"type": "string",
"required": true
}
}
},
"output": {
"type": "list[dict]",
"fields": {
"source": "string",
"content": "string",
"layer": "\"C3_ENTRYPOINTS\"",
"title": "string",
"metadata": {
"entry_id": "string",
"entry_type": "\"http\" | \"cli\"",
"framework": "\"fastapi\" | \"flask\" | \"typer\" | \"click\"",
"route_or_command": "string",
"handler_symbol_id": "string",
"lang_payload": "object",
"artifact_type": "\"CODE\""
},
"score": "float | null"
}
},
"examples": {
"input": {
"rag_session_id": "rag-123",
"query": "which endpoint handles get user"
},
"output": {
"source": "app/api/users.py",
"content": "fastapi http \"/users/{user_id}\"",
"layer": "C3_ENTRYPOINTS",
"title": "\"/users/{user_id}\"",
"metadata": {
"entry_id": "sha256(...)",
"entry_type": "http",
"framework": "fastapi",
"route_or_command": "\"/users/{user_id}\"",
"handler_symbol_id": "sha256(...)",
"lang_payload": {
"methods": [
"GET"
]
},
"artifact_type": "CODE"
},
"score": 0.05
}
},
"defaults": {
"retrieve_limit": 8,
"layer_rank": 0
},
"limitations": [
"Detection is decorator-string based.",
"No Django, Celery, RQ, or cron entrypoints were found.",
"Returned payload does not expose line spans."
]
}
},
"retrieval_endpoint": {
"entrypoint": {
"file": "app/modules/rag_session/module.py",
"method": "internal_router.retrieve"
},
"request": {
"type": "dict",
"fields": {
"rag_session_id": "string | optional if project_id provided",
"project_id": "string | optional fallback for rag_session_id",
"query": "string"
}
},
"response": {
"type": "dict",
"fields": {
"items": "list[retrieval item]"
}
},
"defaults": {
"mode": "docs unless RagQueryRouter detects code hints",
"limit": 8,
"embedding_provider": "GigaChat embeddings",
"fallback_after_embedding_error": true,
"fallback_to_docs_when_code_empty": true
}
},
"ranking": {
"storage": "PostgreSQL rag_chunks + pgvector",
"query_repository": {
"class": "RagQueryRepository",
"file": "app/modules/rag/persistence/query_repository.py",
"method": "retrieve"
},
"order_by": [
"lexical_rank ASC",
"test_penalty ASC",
"layer_rank ASC",
"embedding <=> query_embedding ASC"
],
"notes": [
"lexical_rank is derived from qname/symbol_id/title/path/content matching extracted query terms",
"test_penalty is applied only when prefer_non_tests=true",
"layer priority is C3 > C1 > C2 > C0 for code retrieval"
]
}
}

View File

@@ -0,0 +1,270 @@
# LLM Inventory
## Provider and SDK
- Provider in code: GigaChat / Sber
- Local SDK style: custom thin HTTP client over `requests`
- Core files:
- `app/modules/shared/gigachat/client.py`
- `app/modules/shared/gigachat/settings.py`
- `app/modules/shared/gigachat/token_provider.py`
- `app/modules/agent/llm/service.py`
There is no OpenAI SDK, Azure SDK, or local model runtime in the current implementation.
## Configuration
Model and endpoint configuration are read from environment in `GigaChatSettings.from_env()`:
- `GIGACHAT_AUTH_URL`
- default: `https://ngw.devices.sberbank.ru:9443/api/v2/oauth`
- `GIGACHAT_API_URL`
- default: `https://gigachat.devices.sberbank.ru/api/v1`
- `GIGACHAT_SCOPE`
- default: `GIGACHAT_API_PERS`
- `GIGACHAT_TOKEN`
- required for auth
- `GIGACHAT_SSL_VERIFY`
- default: `true`
- `GIGACHAT_MODEL`
- default: `GigaChat`
- `GIGACHAT_EMBEDDING_MODEL`
- default: `Embeddings`
- `AGENT_PROMPTS_DIR`
- optional prompt directory override
PostgreSQL config for retrieval storage is separate:
- `DATABASE_URL`
- default: `postgresql+psycopg://agent:agent@db:5432/agent`
## Default models
- Chat/completions model default: `GigaChat`
- Embedding model default: `Embeddings`
## Completion payload
Observed payload sent by `GigaChatClient.complete(...)`:
```json
{
"model": "GigaChat",
"messages": [
{"role": "system", "content": "<prompt template text>"},
{"role": "user", "content": "<runtime user input>"}
]
}
```
Endpoint:
- `POST {GIGACHAT_API_URL}/chat/completions`
Observed response handling:
- reads `choices[0].message.content`
- if no choices: returns empty string
## Embeddings payload
Observed payload sent by `GigaChatClient.embed(...)`:
```json
{
"model": "Embeddings",
"input": [
"<text1>",
"<text2>"
]
}
```
Endpoint:
- `POST {GIGACHAT_API_URL}/embeddings`
Observed response handling:
- expects `data` list
- maps each `item.embedding` to `list[float]`
## Parameters
### Explicitly implemented
- `model`
- `messages`
- `input`
- HTTP timeout:
- completions: `90s`
- embeddings: `90s`
- auth: `30s`
- TLS verification flag:
- `verify=settings.ssl_verify`
### Not implemented in payload
- `temperature`
- `top_p`
- `max_tokens`
- `response_format`
- tools/function calling
- streaming
- seed
- stop sequences
`ASSUMPTION:` the service uses provider defaults for sampling and output length because these fields are not sent in the request payload.
## Context and budget limits
There is no centralized token budget manager in the current code.
Observed practical limits instead:
- prompt file text is loaded as-is from disk
- user input is passed as-is
- RAG context shaping happens outside the LLM client
- docs indexing summary truncation:
- docs module catalog summary: `4000` chars
- docs policy text: `4000` chars
- project QA source bundle caps:
- top `12` rag items
- top `10` file candidates
- logging truncation only:
- LLM input/output logs capped at `1500` chars for logs
`ASSUMPTION:` there is no explicit max-context enforcement before chat completion requests. The current system relies on upstream graph logic to keep inputs small enough.
## Retry, backoff, timeout
### Timeouts
- auth: `30s`
- chat completion: `90s`
- embeddings: `90s`
### Retry
- Generic async retry wrapper exists in `app/modules/shared/retry_executor.py`
- It retries only:
- `TimeoutError`
- `ConnectionError`
- `OSError`
- Retry constants:
- `MAX_RETRIES = 5`
- backoff: `0.1 * attempt` seconds
### Important current limitation
- `GigaChatClient` raises `GigaChatError` on HTTP and request failures.
- `RetryExecutor` does not catch `GigaChatError`.
- Result: LLM and embeddings calls are effectively not retried by this generic retry helper unless errors are converted upstream.
## Prompt formation
Prompt loading is handled by `PromptLoader`:
- base dir: `app/modules/agent/prompts`
- override: `AGENT_PROMPTS_DIR`
- file naming convention: `<prompt_name>.txt`
Prompt composition model today:
- system prompt:
- full contents of selected prompt file
- user prompt:
- raw runtime input string passed by the caller
- no separate developer prompt layer in the application payload
If a prompt file is missing:
- fallback system prompt: `You are a helpful assistant.`
## Prompt templates present
- `router_intent`
- `general_answer`
- `project_answer`
- `docs_detect`
- `docs_strategy`
- `docs_plan_sections`
- `docs_generation`
- `docs_self_check`
- `docs_execution_summary`
- `project_edits_plan`
- `project_edits_hunks`
- `project_edits_self_check`
## Key LLM call entrypoints
### Composition roots
- `app/modules/agent/module.py`
- builds `GigaChatSettings`
- builds `GigaChatTokenProvider`
- builds `GigaChatClient`
- builds `PromptLoader`
- builds `AgentLlmService`
- `app/modules/rag_session/module.py`
- builds the same provider stack for embeddings used by RAG
### Main abstraction
- `AgentLlmService.generate(prompt_name, user_input, log_context=None)`
### Current generate callsites
- `app/modules/agent/engine/router/intent_classifier.py`
- `router_intent`
- `app/modules/agent/engine/graphs/base_graph.py`
- `general_answer`
- `app/modules/agent/engine/graphs/project_qa_graph.py`
- `project_answer`
- `app/modules/agent/engine/graphs/docs_graph_logic.py`
- `docs_detect`
- `docs_strategy`
- `docs_plan_sections`
- `docs_generation`
- `docs_self_check`
- `docs_execution_summary`-like usage via summary step
- `app/modules/agent/engine/graphs/project_edits_logic.py`
- `project_edits_plan`
- `project_edits_self_check`
- `project_edits_hunks`
## Logging and observability
`AgentLlmService` logs:
- input:
- `graph llm input: context=... prompt=... user_input=...`
- output:
- `graph llm output: context=... prompt=... output=...`
Log truncation:
- 1500 chars
RAG retrieval logs separately in `RagService`, but without embedding vectors.
## Integration with retrieval
There are two distinct GigaChat usages:
1. Chat/completion path for agent reasoning and generation
2. Embedding path for RAG indexing and retrieval
The embedding adapter is `GigaChatEmbedder`, used by:
- `app/modules/rag/services/rag_service.py`
## Notable limitations
- Single provider coupling: chat and embeddings both depend on GigaChat-specific endpoints.
- No model routing by scenario.
- No tool/function calling.
- No centralized prompt token budgeting.
- No explicit retry for `GigaChatError`.
- No streaming completions.
- No structured response mode beyond prompt conventions and downstream parsing.

View File

@@ -0,0 +1,13 @@
| column | used_by | safe_to_drop | notes |
| --- | --- | --- | --- |
| `layer` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Core selector for C0-C3 and D1-D4 queries. |
| `title` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Used in lexical ranking and prompt evidence labels. |
| `metadata_json` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | C2/C0 graph lookups and docs metadata depend on it. |
| `span_start`, `span_end` | `USED_BY_CODE_V2` | no | Needed for symbol-to-chunk resolution and locations. |
| `symbol_id`, `qname`, `kind`, `lang` | `USED_BY_CODE_V2` | no | Used by code indexing, ranking, trace building, and diagnostics. |
| `repo_id`, `commit_sha` | `USED_BY_CODE_V2`, `USED_BY_DOCS_INDEXING` | no | Used by indexing/cache and retained for provenance. |
| `entrypoint_type`, `framework` | `USED_BY_CODE_V2` | no | Used by C3 filtering and entrypoint diagnostics. |
| `doc_kind`, `module_id`, `section_path` | `USED_BY_DOCS_INDEXING` | no | Still written by docs indexing and covered by docs tests. |
| `artifact_type`, `section`, `doc_version`, `owner`, `system_component`, `last_modified`, `staleness_score` | `USED_BY_DOCS_INDEXING` | no | File metadata still flows through indexing/cache; left intact for now. |
| `rag_doc_id` | `UNUSED` | yes | Written into `rag_chunks` only; no reads in runtime/indexing code. |
| `links_json` | `UNUSED` | yes | Stored in `rag_chunks` only; reads exist for `rag_chunk_cache`, not `rag_chunks`. |

View File

@@ -0,0 +1,31 @@
flowchart TD
A["HTTP: POST /internal/rag/retrieve"] --> B["RagModule.internal_router.retrieve(payload)"]
B --> C["RagService.retrieve(rag_session_id, query)"]
C --> D["RagQueryRouter.resolve_mode(query)"]
D --> E["RagQueryRouter.layers_for_mode(mode)"]
C --> F["GigaChatEmbedder.embed([query])"]
F --> G["GigaChatClient.embed(payload)"]
G --> H["POST /embeddings"]
C --> I["RagRepository.retrieve(...)"]
I --> J["RagQueryRepository.retrieve(...)"]
J --> K["PostgreSQL rag_chunks + pgvector"]
K --> L["ORDER BY lexical_rank, test_penalty, layer_rank, vector distance"]
L --> M["rows: path/content/layer/title/metadata/span/distance"]
M --> N["normalize to {source, content, layer, title, metadata, score}"]
N --> O["response: {items: [...]}"]
C --> P["embedding error?"]
P -->|yes| Q["RagRepository.fallback_chunks(...)"]
Q --> R["latest rows by id DESC"]
R --> N
C --> S["no rows and mode != docs?"]
S -->|yes| T["fallback to docs layers"]
T --> I
U["GraphAgentRuntime for project/qa"] --> V["ProjectQaRetrievalGraphFactory._retrieve_context"]
V --> C
V --> W["ProjectQaSupport.build_source_bundle(...)"]
W --> X["source_bundle"]
X --> Y["context_analysis"]
Y --> Z["answer_composition"]

View File

@@ -0,0 +1,457 @@
# Retrieval Inventory
## Scope and method
This document describes the retrieval and indexing pipeline as implemented in code today. The inventory is based primarily on:
- `app/modules/rag/services/rag_service.py`
- `app/modules/rag/persistence/*.py`
- `app/modules/rag/indexing/code/**/*.py`
- `app/modules/rag/indexing/docs/**/*.py`
- `app/modules/rag_session/module.py`
- `app/modules/agent/engine/graphs/project_qa_step_graphs.py`
- `app/modules/agent/engine/orchestrator/*.py`
`ASSUMPTION:` the intended layer semantics are the ones implied by code and tests, not by future architecture plans. This matters because only `C0` through `C3` are materially implemented today; `C4+` exist only as enum constants.
## Current retrieval pipeline
1. Retrieval entrypoint is `POST /internal/rag/retrieve` in `app/modules/rag_session/module.py`.
2. The endpoint calls `RagService.retrieve(rag_session_id, query)`.
3. `RagQueryRouter` chooses `docs` or `code` mode from the raw query text.
4. `RagService` computes a single embedding for the full query via `GigaChatEmbedder`.
5. `RagQueryRepository.retrieve(...)` runs one SQL query against `rag_chunks` in PostgreSQL with `pgvector`.
6. Ranking order is:
- lexical rank
- test-file penalty
- layer rank
- vector distance `embedding <=> query_embedding`
7. Response items are normalized to `{source, content, layer, title, metadata, score}`.
8. If embeddings fail, retrieval falls back to latest chunks from the same layers.
9. If code retrieval returns nothing, service falls back to docs layers.
## Storage and indices
- Primary store: PostgreSQL from `DATABASE_URL`, configured in `app/modules/shared/db.py`.
- Vector extension: `CREATE EXTENSION IF NOT EXISTS vector` in `app/modules/rag/persistence/schema_repository.py`.
- Primary table: `rag_chunks`.
- Cache tables:
- `rag_blob_cache`
- `rag_chunk_cache`
- `rag_session_chunk_map`
- SQL indexes currently created:
- `(rag_session_id)`
- `(rag_session_id, layer)`
- `(rag_session_id, layer, path)`
- `(qname)`
- `(symbol_id)`
- `(module_id)`
- `(doc_kind)`
- `(entrypoint_type, framework)`
`ASSUMPTION:` there is no explicit ANN index for the vector column in schema code. The code creates general SQL indexes, but no `ivfflat`/`hnsw` index is defined here.
## Layer: C0_SOURCE_CHUNKS
### Implementation
- Produced by `CodeIndexingPipeline.index_file(...)` in `app/modules/rag/indexing/code/pipeline.py`.
- Chunking logic: `CodeTextChunker.chunk(...)` in `app/modules/rag/indexing/code/code_text/chunker.py`.
- Document builder: `CodeTextDocumentBuilder.build(...)` in `app/modules/rag/indexing/code/code_text/document_builder.py`.
- Persisted via `RagDocumentRepository.insert_documents(...)` into `rag_chunks`.
### Input contract
This is an indexing layer, not a direct public retriever. The observed upstream indexing input is a file dict with at least:
- required:
- `path: str`
- `content: str`
- optional:
- `commit_sha: str | None`
- `content_hash: str`
- metadata fields copied through by `RagService._document_metadata(...)`
For retrieval, the layer is queried only indirectly through:
- `rag_session_id: str`
- `query: str`
- inferred mode/layers from `RagQueryRouter`
- fixed `limit=8`
### Output contract
Stored document shape:
- top-level:
- `layer = "C0_SOURCE_CHUNKS"`
- `lang = "python"`
- `source.repo_id`
- `source.commit_sha`
- `source.path`
- `title`
- `text`
- `span.start_line`
- `span.end_line`
- `embedding`
- metadata:
- `chunk_index`
- `chunk_type`: `symbol_block` or `window`
- `module_or_unit`
- `artifact_type = "CODE"`
- plus file-level metadata injected by `RagService`
Returned retrieval item shape:
- `source`
- `content`
- `layer`
- `title`
- `metadata`
- `score`
No `line_start` / `line_end` are returned to the caller directly; they remain in DB columns `span_start` / `span_end` and are only used in logs.
### Defaults & limits
- AST chunking prefers one chunk per top-level class/function/async function.
- Fallback window chunking:
- `size = 80` lines
- `overlap = 15` lines
- Global retrieval limit from `RagService.retrieve(...)`: `8`
- Embedding batch size from env:
- `RAG_EMBED_BATCH_SIZE`
- default `16`
### Known issues
- Nested methods/functions are not emitted as C0 chunks unless represented inside a selected top-level block.
- Returned API payload omits line spans even though storage has them.
- No direct filter by path, namespace, symbol, or `top_k` is exposed through the current endpoint.
## Layer: C1_SYMBOL_CATALOG
### Implementation
- Symbol extraction: `SymbolExtractor.extract(...)` in `app/modules/rag/indexing/code/symbols/extractor.py`.
- AST parsing: `PythonAstParser.parse_module(...)`.
- Document builder: `SymbolDocumentBuilder.build(...)`.
- Retrieval reads rows from `rag_chunks`; there is no dedicated symbol table.
### Input contract
Indexing input is the same per-file payload as C0.
Observed symbol extraction source:
- Python AST only
- supported symbol kinds:
- `class`
- `function`
- `method`
- `const` for top-level imports/import aliases
Retrieval input is still the generic text query endpoint. Query terms are enriched by `extract_query_terms(...)`:
- extracts identifier-like tokens from query text
- normalizes camelCase/PascalCase to snake_case
- adds special intent terms for management/control-related queries
- max observed query terms: `6`
### Output contract
Stored document shape:
- top-level:
- `layer = "C1_SYMBOL_CATALOG"`
- `title = qname`
- `text = "<kind> <qname>\n<signature>\n<docstring?>"`
- `span.start_line`
- `span.end_line`
- metadata:
- `symbol_id`
- `qname`
- `kind`
- `signature`
- `decorators_or_annotations`
- `docstring_or_javadoc`
- `parent_symbol_id`
- `package_or_module`
- `is_entry_candidate`
- `lang_payload`
- `artifact_type = "CODE"`
Observed `lang_payload` variants:
- class:
- `bases`
- function/method:
- `async`
- import alias:
- `imported_from`
- `import_alias`
### Defaults & limits
- Only Python source files are indexed into C-layers.
- Import and import-from declarations are materialized as `const` symbols only at module top level.
- Retrieval ranking gives C1 priority rank `1`, after C3 and before C2/C0.
### Known issues
- No explicit visibility/public-private model.
- `parent_symbol_id` currently stores the parent qname string from the stack, not the parent symbol hash. This is an observed implementation detail.
- Cross-file symbol resolution is not implemented; `dst_symbol_id` in edges resolves only against symbols extracted from the same file.
## Layer: C2_DEPENDENCY_GRAPH
### Implementation
- Edge extraction: `EdgeExtractor.extract(...)` in `app/modules/rag/indexing/code/edges/extractor.py`.
- Document builder: `EdgeDocumentBuilder.build(...)`.
- Built during `CodeIndexingPipeline.index_file(...)`.
### Input contract
Indexing input is the same per-file source payload as C0/C1.
Graph construction method:
- static analysis only
- Python AST walk only
- no runtime tracing
- no tree-sitter
Observed edge types:
- `calls`
- `imports`
- `inherits`
### Output contract
Stored document shape:
- top-level:
- `layer = "C2_DEPENDENCY_GRAPH"`
- `title = "<src_qname>:<edge_type>"`
- `text = "<src_qname> <edge_type> <dst>"`
- `span.start_line`
- `span.end_line`
- `links` contains one evidence link of type `EDGE`
- metadata:
- `edge_id`
- `edge_type`
- `src_symbol_id`
- `src_qname`
- `dst_symbol_id`
- `dst_ref`
- `resolution`: `resolved` or `partial`
- `lang_payload`
- `artifact_type = "CODE"`
Observed `lang_payload` usage:
- for calls: may include `callsite_kind = "function_call"`
### Defaults & limits
- Edge extraction is per-file only.
- `imports` edges are emitted only while visiting a class/function scope; top-level imports do not become C2 edges.
- Layer rank in retrieval SQL: `2`
### Known issues
- There is no traversal API, graph repository, or query language over C2. Retrieval only treats edges as text/vector rows in `rag_chunks`.
- Destination resolution is local to the file-level qname map.
- Top-level module import relationships are incompletely represented because `visit_Import` / `visit_ImportFrom` skip when there is no current scope.
## Layer: C3_ENTRYPOINTS
### Implementation
- Detection registry: `EntrypointDetectorRegistry.detect_all(...)`.
- Detectors:
- `FastApiEntrypointDetector`
- `FlaskEntrypointDetector`
- `TyperClickEntrypointDetector`
- Document builder: `EntrypointDocumentBuilder.build(...)`.
### Input contract
Indexing input is the same per-file source payload as other C-layers.
Detected entrypoint families today:
- HTTP:
- FastAPI decorators such as `.get`, `.post`, `.put`, `.patch`, `.delete`, `.route`
- Flask `.route`
- CLI:
- Typer/Click `.command`
- Typer/Click `.callback`
Not detected:
- Django routes
- Celery tasks
- RQ jobs
- cron jobs / scheduler entries
### Output contract
Stored document shape:
- top-level:
- `layer = "C3_ENTRYPOINTS"`
- `title = route_or_command`
- `text = "<framework> <entry_type> <route_or_command>"`
- `span.start_line`
- `span.end_line`
- `links` contains one evidence link of type `CODE_SPAN`
- metadata:
- `entry_id`
- `entry_type`: observed `http` or `cli`
- `framework`: observed `fastapi`, `flask`, `typer`, `click`
- `route_or_command`
- `handler_symbol_id`
- `lang_payload`
- `artifact_type = "CODE"`
FastAPI-specific observed payload:
- `lang_payload.methods = [HTTP_METHOD]` for `.get/.post/...`
### Defaults & limits
- Retrieval layer rank: `0` highest among code layers.
- Entrypoint mapping is handler-symbol centric:
- decorator match -> symbol -> `handler_symbol_id`
- physical location comes from symbol span
### Known issues
- Route parsing is string-based from decorator text, not semantic AST argument parsing.
- No dedicated entrypoint tags beyond `entry_type`, `framework`, and raw decorator-derived payload.
- Background jobs and non-decorator entrypoints are not indexed.
## Dependency graph / trace current state
### Exists or stub?
- C2 exists and is populated.
- It is not a stub.
- It is also not a full-project dependency graph service; it is a set of per-edge documents stored in `rag_chunks`.
### How the graph is built
- static Python AST analysis
- no runtime instrumentation
- no import graph resolver across modules
- no tree-sitter
### Edge types in data
- `calls`
- `imports`
- `inherits`
### Traversal API
- No traversal API was found in `app/modules/rag/*` or `app/modules/agent/*`.
- No method accepts graph traversal parameters such as depth, start node, edge filters, or BFS/DFS strategy.
- Current access path is only retrieval over indexed edge documents.
## Entrypoints current state
### Implemented extraction
- HTTP routes:
- FastAPI
- Flask
- CLI:
- Typer
- Click
### Mapping model
- `entrypoint -> handler_symbol_id -> symbol span/path`
- The entrypoint record itself stores:
- framework
- entry type
- raw route/command string
- handler symbol id
### Tags/types
- `entry_type` is the main normalized tag.
- Observed values: `http`, `cli`.
- `framework` is the second discriminator.
- There are no richer endpoint taxonomies such as `job`, `worker`, `webhook`, `scheduler`.
## Defaults and operational limits
- Query mode default: `docs`
- Code mode is enabled by keyword heuristics in `RagQueryRouter`
- Retrieval hard limit: `8`
- Fallback limit: `8`
- Query term extraction limit: `6`
- Ranked source bundle for project QA:
- top `12` RAG items
- top `10` file candidates
- No exposed `namespace`, `path_prefixes`, `top_k`, `max_chars`, `max_chunks`, `max_depth` in the public/internal retrieval endpoint
`ASSUMPTION:` the absence of these controls in endpoint and service signatures means they are not part of the current supported contract, even though `RagQueryRepository.retrieve(...)` has an internal `path_prefixes` parameter.
## Known cross-cutting issues
- Retrieval contract is effectively text-only at API level; structured retrieval exists only as internal SQL parameters.
- Response payload drops explicit line spans even though spans are stored.
- Vector retrieval is coupled to a single provider-specific embedder.
- Docs mode is the default, so code retrieval depends on heuristic query phrasing unless the project/qa graph prepends `по коду`.
- There is no separate retrieval contract per layer exposed over API; all layer selection is implicit.
## Where to plug ExplainPack pipeline
### Option 1: replace or extend `project_qa/context_analysis`
- Code location:
- `app/modules/agent/engine/graphs/project_qa_step_graphs.py`
- Why:
- retrieval is already complete at this step
- input bundle already contains ranked `rag_items` and `file_candidates`
- output is already a structured `analysis_brief`
- Risk:
- low
- minimal invasion if ExplainPack consumes `source_bundle` and emits the same `analysis_brief` shape
### Option 2: insert a new orchestrator step between `context_retrieval` and `context_analysis`
- Code location:
- `app/modules/agent/engine/orchestrator/template_registry.py`
- `app/modules/agent/engine/orchestrator/step_registry.py`
- Why:
- preserves current retrieval behavior
- makes ExplainPack an explicit pipeline stage with its own artifact
- cleanest for observability and future A/B migration
- Risk:
- low to medium
- requires one new artifact contract and one extra orchestration step, but no change to retrieval storage
### Option 3: introduce ExplainPack inside `ExplainActions.extract_logic`
- Code location:
- `app/modules/agent/engine/orchestrator/actions/explain_actions.py`
- Why:
- useful if ExplainPack is meant only for explain-style scenarios
- keeps general project QA untouched
- Risk:
- medium
- narrower integration point; may create duplicate reasoning logic separate from project QA analysis path
## Bottom line
- C0-C3 are implemented and persisted in one physical store: `rag_chunks`.
- Retrieval is a hybrid SQL ranking over lexical heuristics plus pgvector distance.
- C2 exists, but only as retrievable edge documents, not as a traversable graph subsystem.
- C3 covers FastAPI/Flask/Typer/Click only.
- The least invasive ExplainPack integration point is after retrieval and before answer composition, preferably as a new explicit orchestrator artifact or as a replacement for `context_analysis`.