Новый раг

This commit is contained in:
2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions

View File

@@ -0,0 +1,52 @@
from __future__ import annotations
from app.modules.rag.contracts import RagDocument, RagSource
from app.modules.rag.indexing.code.code_text.chunker import CodeTextChunker
from app.modules.rag.indexing.code.code_text.document_builder import CodeTextDocumentBuilder
from app.modules.rag.indexing.code.edges.document_builder import EdgeDocumentBuilder
from app.modules.rag.indexing.code.edges.extractor import EdgeExtractor
from app.modules.rag.indexing.code.entrypoints.document_builder import EntrypointDocumentBuilder
from app.modules.rag.indexing.code.entrypoints.fastapi_detector import FastApiEntrypointDetector
from app.modules.rag.indexing.code.entrypoints.flask_detector import FlaskEntrypointDetector
from app.modules.rag.indexing.code.entrypoints.registry import EntrypointDetectorRegistry
from app.modules.rag.indexing.code.entrypoints.typer_click_detector import TyperClickEntrypointDetector
from app.modules.rag.indexing.code.file_filter import PythonFileFilter
from app.modules.rag.indexing.code.symbols.ast_parser import PythonAstParser
from app.modules.rag.indexing.code.symbols.document_builder import SymbolDocumentBuilder
from app.modules.rag.indexing.code.symbols.extractor import SymbolExtractor
class CodeIndexingPipeline:
def __init__(self) -> None:
self._filter = PythonFileFilter()
self._chunker = CodeTextChunker()
self._code_builder = CodeTextDocumentBuilder()
self._parser = PythonAstParser()
self._symbols = SymbolExtractor()
self._symbol_builder = SymbolDocumentBuilder()
self._edges = EdgeExtractor()
self._edge_builder = EdgeDocumentBuilder()
self._entrypoints = EntrypointDetectorRegistry(
[FastApiEntrypointDetector(), FlaskEntrypointDetector(), TyperClickEntrypointDetector()]
)
self._entrypoint_builder = EntrypointDocumentBuilder()
def supports(self, path: str) -> bool:
return self._filter.should_index(path)
def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]:
source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path)
docs: list[RagDocument] = []
code_chunks = self._chunker.chunk(path, content)
for index, chunk in enumerate(code_chunks):
docs.append(self._code_builder.build(source, chunk, chunk_index=index))
tree = self._parser.parse_module(content)
symbols = self._symbols.extract(path, content, tree)
for symbol in symbols:
docs.append(self._symbol_builder.build(source, symbol))
edges = self._edges.extract(path, tree, symbols)
for edge in edges:
docs.append(self._edge_builder.build(source, edge))
for entrypoint in self._entrypoints.detect_all(path=path, symbols=symbols):
docs.append(self._entrypoint_builder.build(source, entrypoint))
return docs