Фиксация изменений
This commit is contained in:
380
docs/architecture/contracts_retrieval.json
Normal file
380
docs/architecture/contracts_retrieval.json
Normal file
@@ -0,0 +1,380 @@
|
||||
{
|
||||
"layers": {
|
||||
"C0_SOURCE_CHUNKS": {
|
||||
"retriever": {
|
||||
"class": "RagService",
|
||||
"file": "app/modules/rag/services/rag_service.py",
|
||||
"method": "retrieve"
|
||||
},
|
||||
"indexer": {
|
||||
"class": "CodeTextDocumentBuilder",
|
||||
"file": "app/modules/rag/indexing/code/code_text/document_builder.py",
|
||||
"method": "build"
|
||||
},
|
||||
"input": {
|
||||
"type": "observed shape",
|
||||
"fields": {
|
||||
"rag_session_id": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"layers": {
|
||||
"type": "implicit list[string]",
|
||||
"required": false,
|
||||
"source": "RagQueryRouter.layers_for_mode('code')"
|
||||
}
|
||||
}
|
||||
},
|
||||
"output": {
|
||||
"type": "list[dict]",
|
||||
"fields": {
|
||||
"source": "string",
|
||||
"content": "string",
|
||||
"layer": "\"C0_SOURCE_CHUNKS\"",
|
||||
"title": "string",
|
||||
"metadata": {
|
||||
"chunk_index": "int",
|
||||
"chunk_type": "\"symbol_block\" | \"window\"",
|
||||
"module_or_unit": "string",
|
||||
"artifact_type": "\"CODE\""
|
||||
},
|
||||
"score": "float | null"
|
||||
}
|
||||
},
|
||||
"examples": {
|
||||
"input": {
|
||||
"rag_session_id": "rag-123",
|
||||
"query": "where is implemented get_user"
|
||||
},
|
||||
"output": {
|
||||
"source": "app/api/users.py",
|
||||
"content": "async def get_user(user_id: str):\n service = UserService()\n return service.get_user(user_id)",
|
||||
"layer": "C0_SOURCE_CHUNKS",
|
||||
"title": "app/api/users.py:get_user",
|
||||
"metadata": {
|
||||
"chunk_index": 0,
|
||||
"chunk_type": "symbol_block",
|
||||
"module_or_unit": "app.api.users",
|
||||
"artifact_type": "CODE"
|
||||
},
|
||||
"score": 0.07
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"retrieve_limit": 8,
|
||||
"embed_batch_size_env": "RAG_EMBED_BATCH_SIZE",
|
||||
"embed_batch_size_default": 16,
|
||||
"window_chunk_size_lines": 80,
|
||||
"window_overlap_lines": 15
|
||||
},
|
||||
"limitations": [
|
||||
"Line spans are stored but not returned in the public retrieval item shape.",
|
||||
"No direct path or namespace filter is exposed through the retrieval endpoint."
|
||||
]
|
||||
},
|
||||
"C1_SYMBOL_CATALOG": {
|
||||
"retriever": {
|
||||
"class": "RagService",
|
||||
"file": "app/modules/rag/services/rag_service.py",
|
||||
"method": "retrieve"
|
||||
},
|
||||
"indexer": {
|
||||
"class": "SymbolDocumentBuilder",
|
||||
"file": "app/modules/rag/indexing/code/symbols/document_builder.py",
|
||||
"method": "build"
|
||||
},
|
||||
"input": {
|
||||
"type": "observed shape",
|
||||
"fields": {
|
||||
"rag_session_id": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"query_term_expansion": {
|
||||
"type": "list[string]",
|
||||
"required": false,
|
||||
"source": "extract_query_terms(query_text)",
|
||||
"max_items": 6
|
||||
}
|
||||
}
|
||||
},
|
||||
"output": {
|
||||
"type": "list[dict]",
|
||||
"fields": {
|
||||
"source": "string",
|
||||
"content": "string",
|
||||
"layer": "\"C1_SYMBOL_CATALOG\"",
|
||||
"title": "string",
|
||||
"metadata": {
|
||||
"symbol_id": "string",
|
||||
"qname": "string",
|
||||
"kind": "\"class\" | \"function\" | \"method\" | \"const\"",
|
||||
"signature": "string",
|
||||
"decorators_or_annotations": "list[string]",
|
||||
"docstring_or_javadoc": "string | null",
|
||||
"parent_symbol_id": "string | null",
|
||||
"package_or_module": "string",
|
||||
"is_entry_candidate": "bool",
|
||||
"lang_payload": "object",
|
||||
"artifact_type": "\"CODE\""
|
||||
},
|
||||
"score": "float | null"
|
||||
}
|
||||
},
|
||||
"examples": {
|
||||
"input": {
|
||||
"rag_session_id": "rag-123",
|
||||
"query": "where is implemented get_user"
|
||||
},
|
||||
"output": {
|
||||
"source": "app/api/users.py",
|
||||
"content": "function get_user\nget_user(user_id)",
|
||||
"layer": "C1_SYMBOL_CATALOG",
|
||||
"title": "get_user",
|
||||
"metadata": {
|
||||
"symbol_id": "sha256(...)",
|
||||
"qname": "get_user",
|
||||
"kind": "function",
|
||||
"signature": "get_user(user_id)",
|
||||
"decorators_or_annotations": [
|
||||
"router.get"
|
||||
],
|
||||
"docstring_or_javadoc": null,
|
||||
"parent_symbol_id": null,
|
||||
"package_or_module": "app.api.users",
|
||||
"is_entry_candidate": true,
|
||||
"lang_payload": {
|
||||
"async": true
|
||||
},
|
||||
"artifact_type": "CODE"
|
||||
},
|
||||
"score": 0.07
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"retrieve_limit": 8,
|
||||
"layer_rank": 1
|
||||
},
|
||||
"limitations": [
|
||||
"Only Python AST symbols are indexed.",
|
||||
"Cross-file resolution is not implemented.",
|
||||
"parent_symbol_id is an observed qname-like value, not guaranteed to be a symbol hash."
|
||||
]
|
||||
},
|
||||
"C2_DEPENDENCY_GRAPH": {
|
||||
"retriever": {
|
||||
"class": "RagService",
|
||||
"file": "app/modules/rag/services/rag_service.py",
|
||||
"method": "retrieve"
|
||||
},
|
||||
"indexer": {
|
||||
"class": "EdgeDocumentBuilder",
|
||||
"file": "app/modules/rag/indexing/code/edges/document_builder.py",
|
||||
"method": "build"
|
||||
},
|
||||
"input": {
|
||||
"type": "observed shape",
|
||||
"fields": {
|
||||
"rag_session_id": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"output": {
|
||||
"type": "list[dict]",
|
||||
"fields": {
|
||||
"source": "string",
|
||||
"content": "string",
|
||||
"layer": "\"C2_DEPENDENCY_GRAPH\"",
|
||||
"title": "string",
|
||||
"metadata": {
|
||||
"edge_id": "string",
|
||||
"edge_type": "\"calls\" | \"imports\" | \"inherits\"",
|
||||
"src_symbol_id": "string",
|
||||
"src_qname": "string",
|
||||
"dst_symbol_id": "string | null",
|
||||
"dst_ref": "string | null",
|
||||
"resolution": "\"resolved\" | \"partial\"",
|
||||
"lang_payload": "object",
|
||||
"artifact_type": "\"CODE\""
|
||||
},
|
||||
"score": "float | null"
|
||||
}
|
||||
},
|
||||
"examples": {
|
||||
"input": {
|
||||
"rag_session_id": "rag-123",
|
||||
"query": "how get_user calls service"
|
||||
},
|
||||
"output": {
|
||||
"source": "app/api/users.py",
|
||||
"content": "get_user calls UserService",
|
||||
"layer": "C2_DEPENDENCY_GRAPH",
|
||||
"title": "get_user:calls",
|
||||
"metadata": {
|
||||
"edge_id": "sha256(...)",
|
||||
"edge_type": "calls",
|
||||
"src_symbol_id": "sha256(...)",
|
||||
"src_qname": "get_user",
|
||||
"dst_symbol_id": null,
|
||||
"dst_ref": "UserService",
|
||||
"resolution": "partial",
|
||||
"lang_payload": {
|
||||
"callsite_kind": "function_call"
|
||||
},
|
||||
"artifact_type": "CODE"
|
||||
},
|
||||
"score": 0.11
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"retrieve_limit": 8,
|
||||
"layer_rank": 2,
|
||||
"graph_build_mode": "static_python_ast"
|
||||
},
|
||||
"limitations": [
|
||||
"No traversal API exists.",
|
||||
"Edges are stored as retrievable rows, not as a graph-native store.",
|
||||
"Destination resolution is local to one indexed file."
|
||||
]
|
||||
},
|
||||
"C3_ENTRYPOINTS": {
|
||||
"retriever": {
|
||||
"class": "RagService",
|
||||
"file": "app/modules/rag/services/rag_service.py",
|
||||
"method": "retrieve"
|
||||
},
|
||||
"indexer": {
|
||||
"class": "EntrypointDocumentBuilder",
|
||||
"file": "app/modules/rag/indexing/code/entrypoints/document_builder.py",
|
||||
"method": "build"
|
||||
},
|
||||
"input": {
|
||||
"type": "observed shape",
|
||||
"fields": {
|
||||
"rag_session_id": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
},
|
||||
"query": {
|
||||
"type": "string",
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"output": {
|
||||
"type": "list[dict]",
|
||||
"fields": {
|
||||
"source": "string",
|
||||
"content": "string",
|
||||
"layer": "\"C3_ENTRYPOINTS\"",
|
||||
"title": "string",
|
||||
"metadata": {
|
||||
"entry_id": "string",
|
||||
"entry_type": "\"http\" | \"cli\"",
|
||||
"framework": "\"fastapi\" | \"flask\" | \"typer\" | \"click\"",
|
||||
"route_or_command": "string",
|
||||
"handler_symbol_id": "string",
|
||||
"lang_payload": "object",
|
||||
"artifact_type": "\"CODE\""
|
||||
},
|
||||
"score": "float | null"
|
||||
}
|
||||
},
|
||||
"examples": {
|
||||
"input": {
|
||||
"rag_session_id": "rag-123",
|
||||
"query": "which endpoint handles get user"
|
||||
},
|
||||
"output": {
|
||||
"source": "app/api/users.py",
|
||||
"content": "fastapi http \"/users/{user_id}\"",
|
||||
"layer": "C3_ENTRYPOINTS",
|
||||
"title": "\"/users/{user_id}\"",
|
||||
"metadata": {
|
||||
"entry_id": "sha256(...)",
|
||||
"entry_type": "http",
|
||||
"framework": "fastapi",
|
||||
"route_or_command": "\"/users/{user_id}\"",
|
||||
"handler_symbol_id": "sha256(...)",
|
||||
"lang_payload": {
|
||||
"methods": [
|
||||
"GET"
|
||||
]
|
||||
},
|
||||
"artifact_type": "CODE"
|
||||
},
|
||||
"score": 0.05
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"retrieve_limit": 8,
|
||||
"layer_rank": 0
|
||||
},
|
||||
"limitations": [
|
||||
"Detection is decorator-string based.",
|
||||
"No Django, Celery, RQ, or cron entrypoints were found.",
|
||||
"Returned payload does not expose line spans."
|
||||
]
|
||||
}
|
||||
},
|
||||
"retrieval_endpoint": {
|
||||
"entrypoint": {
|
||||
"file": "app/modules/rag_session/module.py",
|
||||
"method": "internal_router.retrieve"
|
||||
},
|
||||
"request": {
|
||||
"type": "dict",
|
||||
"fields": {
|
||||
"rag_session_id": "string | optional if project_id provided",
|
||||
"project_id": "string | optional fallback for rag_session_id",
|
||||
"query": "string"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"type": "dict",
|
||||
"fields": {
|
||||
"items": "list[retrieval item]"
|
||||
}
|
||||
},
|
||||
"defaults": {
|
||||
"mode": "docs unless RagQueryRouter detects code hints",
|
||||
"limit": 8,
|
||||
"embedding_provider": "GigaChat embeddings",
|
||||
"fallback_after_embedding_error": true,
|
||||
"fallback_to_docs_when_code_empty": true
|
||||
}
|
||||
},
|
||||
"ranking": {
|
||||
"storage": "PostgreSQL rag_chunks + pgvector",
|
||||
"query_repository": {
|
||||
"class": "RagQueryRepository",
|
||||
"file": "app/modules/rag/persistence/query_repository.py",
|
||||
"method": "retrieve"
|
||||
},
|
||||
"order_by": [
|
||||
"lexical_rank ASC",
|
||||
"test_penalty ASC",
|
||||
"layer_rank ASC",
|
||||
"embedding <=> query_embedding ASC"
|
||||
],
|
||||
"notes": [
|
||||
"lexical_rank is derived from qname/symbol_id/title/path/content matching extracted query terms",
|
||||
"test_penalty is applied only when prefer_non_tests=true",
|
||||
"layer priority is C3 > C1 > C2 > C0 for code retrieval"
|
||||
]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user