Новый раг
This commit is contained in:
52
app/modules/rag/indexing/code/pipeline.py
Normal file
52
app/modules/rag/indexing/code/pipeline.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagDocument, RagSource
|
||||
from app.modules.rag.indexing.code.code_text.chunker import CodeTextChunker
|
||||
from app.modules.rag.indexing.code.code_text.document_builder import CodeTextDocumentBuilder
|
||||
from app.modules.rag.indexing.code.edges.document_builder import EdgeDocumentBuilder
|
||||
from app.modules.rag.indexing.code.edges.extractor import EdgeExtractor
|
||||
from app.modules.rag.indexing.code.entrypoints.document_builder import EntrypointDocumentBuilder
|
||||
from app.modules.rag.indexing.code.entrypoints.fastapi_detector import FastApiEntrypointDetector
|
||||
from app.modules.rag.indexing.code.entrypoints.flask_detector import FlaskEntrypointDetector
|
||||
from app.modules.rag.indexing.code.entrypoints.registry import EntrypointDetectorRegistry
|
||||
from app.modules.rag.indexing.code.entrypoints.typer_click_detector import TyperClickEntrypointDetector
|
||||
from app.modules.rag.indexing.code.file_filter import PythonFileFilter
|
||||
from app.modules.rag.indexing.code.symbols.ast_parser import PythonAstParser
|
||||
from app.modules.rag.indexing.code.symbols.document_builder import SymbolDocumentBuilder
|
||||
from app.modules.rag.indexing.code.symbols.extractor import SymbolExtractor
|
||||
|
||||
|
||||
class CodeIndexingPipeline:
|
||||
def __init__(self) -> None:
|
||||
self._filter = PythonFileFilter()
|
||||
self._chunker = CodeTextChunker()
|
||||
self._code_builder = CodeTextDocumentBuilder()
|
||||
self._parser = PythonAstParser()
|
||||
self._symbols = SymbolExtractor()
|
||||
self._symbol_builder = SymbolDocumentBuilder()
|
||||
self._edges = EdgeExtractor()
|
||||
self._edge_builder = EdgeDocumentBuilder()
|
||||
self._entrypoints = EntrypointDetectorRegistry(
|
||||
[FastApiEntrypointDetector(), FlaskEntrypointDetector(), TyperClickEntrypointDetector()]
|
||||
)
|
||||
self._entrypoint_builder = EntrypointDocumentBuilder()
|
||||
|
||||
def supports(self, path: str) -> bool:
|
||||
return self._filter.should_index(path)
|
||||
|
||||
def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]:
|
||||
source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path)
|
||||
docs: list[RagDocument] = []
|
||||
code_chunks = self._chunker.chunk(path, content)
|
||||
for index, chunk in enumerate(code_chunks):
|
||||
docs.append(self._code_builder.build(source, chunk, chunk_index=index))
|
||||
tree = self._parser.parse_module(content)
|
||||
symbols = self._symbols.extract(path, content, tree)
|
||||
for symbol in symbols:
|
||||
docs.append(self._symbol_builder.build(source, symbol))
|
||||
edges = self._edges.extract(path, tree, symbols)
|
||||
for edge in edges:
|
||||
docs.append(self._edge_builder.build(source, edge))
|
||||
for entrypoint in self._entrypoints.detect_all(path=path, symbols=symbols):
|
||||
docs.append(self._entrypoint_builder.build(source, entrypoint))
|
||||
return docs
|
||||
Reference in New Issue
Block a user