Новый раг
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
11
app/modules/rag/indexing/code/symbols/ast_parser.py
Normal file
11
app/modules/rag/indexing/code/symbols/ast_parser.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
|
||||
|
||||
class PythonAstParser:
|
||||
def parse_module(self, text: str) -> ast.AST | None:
|
||||
try:
|
||||
return ast.parse(text)
|
||||
except SyntaxError:
|
||||
return None
|
||||
32
app/modules/rag/indexing/code/symbols/document_builder.py
Normal file
32
app/modules/rag/indexing/code/symbols/document_builder.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagDocument, RagLayer, RagSource, RagSpan
|
||||
from app.modules.rag.indexing.code.symbols.extractor import PySymbol
|
||||
|
||||
|
||||
class SymbolDocumentBuilder:
|
||||
def build(self, source: RagSource, symbol: PySymbol) -> RagDocument:
|
||||
body = [f"{symbol.kind} {symbol.qname}", symbol.signature]
|
||||
if symbol.docstring:
|
||||
body.append(symbol.docstring.strip())
|
||||
return RagDocument(
|
||||
layer=RagLayer.CODE_SYMBOL_CATALOG,
|
||||
lang="python",
|
||||
source=source,
|
||||
title=symbol.qname,
|
||||
text="\n".join(part for part in body if part),
|
||||
span=RagSpan(symbol.start_line, symbol.end_line),
|
||||
metadata={
|
||||
"symbol_id": symbol.symbol_id,
|
||||
"qname": symbol.qname,
|
||||
"kind": symbol.kind,
|
||||
"signature": symbol.signature,
|
||||
"decorators_or_annotations": symbol.decorators,
|
||||
"docstring_or_javadoc": symbol.docstring,
|
||||
"parent_symbol_id": symbol.parent_symbol_id,
|
||||
"package_or_module": source.path.replace("/", ".").removesuffix(".py"),
|
||||
"is_entry_candidate": bool(symbol.decorators),
|
||||
"lang_payload": symbol.lang_payload,
|
||||
"artifact_type": "CODE",
|
||||
},
|
||||
)
|
||||
130
app/modules/rag/indexing/code/symbols/extractor.py
Normal file
130
app/modules/rag/indexing/code/symbols/extractor.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from dataclasses import dataclass, field
|
||||
from hashlib import sha256
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PySymbol:
|
||||
symbol_id: str
|
||||
qname: str
|
||||
kind: str
|
||||
path: str
|
||||
start_line: int
|
||||
end_line: int
|
||||
signature: str
|
||||
decorators: list[str] = field(default_factory=list)
|
||||
docstring: str | None = None
|
||||
parent_symbol_id: str | None = None
|
||||
lang_payload: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class SymbolExtractor:
|
||||
def extract(self, path: str, text: str, ast_tree: ast.AST | None) -> list[PySymbol]:
|
||||
if ast_tree is None:
|
||||
return []
|
||||
collector = _SymbolVisitor(path)
|
||||
collector.visit(ast_tree)
|
||||
return collector.symbols
|
||||
|
||||
|
||||
class _SymbolVisitor(ast.NodeVisitor):
|
||||
def __init__(self, path: str) -> None:
|
||||
self._path = path
|
||||
self._stack: list[tuple[str, str]] = []
|
||||
self.symbols: list[PySymbol] = []
|
||||
|
||||
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
||||
if self._stack:
|
||||
return
|
||||
module = node.module or ""
|
||||
for item in node.names:
|
||||
local_name = item.asname or item.name
|
||||
imported_name = f"{module}.{item.name}".strip(".")
|
||||
self.symbols.append(
|
||||
PySymbol(
|
||||
symbol_id=sha256(f"{self._path}|{local_name}|import_alias".encode("utf-8")).hexdigest(),
|
||||
qname=local_name,
|
||||
kind="const",
|
||||
path=self._path,
|
||||
start_line=int(getattr(node, "lineno", 1)),
|
||||
end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
|
||||
signature=f"{local_name} = {imported_name}",
|
||||
lang_payload={"imported_from": imported_name, "import_alias": True},
|
||||
)
|
||||
)
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_Import(self, node: ast.Import) -> None:
|
||||
if self._stack:
|
||||
return
|
||||
for item in node.names:
|
||||
local_name = item.asname or item.name
|
||||
self.symbols.append(
|
||||
PySymbol(
|
||||
symbol_id=sha256(f"{self._path}|{local_name}|import".encode("utf-8")).hexdigest(),
|
||||
qname=local_name,
|
||||
kind="const",
|
||||
path=self._path,
|
||||
start_line=int(getattr(node, "lineno", 1)),
|
||||
end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
|
||||
signature=f"import {item.name}",
|
||||
lang_payload={"imported_from": item.name, "import_alias": bool(item.asname)},
|
||||
)
|
||||
)
|
||||
self.generic_visit(node)
|
||||
|
||||
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
||||
self._add_symbol(node, "class", {"bases": [self._expr_name(base) for base in node.bases]})
|
||||
self.generic_visit(node)
|
||||
self._stack.pop()
|
||||
|
||||
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
||||
self._add_function(node, is_async=False)
|
||||
|
||||
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
||||
self._add_function(node, is_async=True)
|
||||
|
||||
def _add_function(self, node, *, is_async: bool) -> None:
|
||||
kind = "method" if self._stack and self._stack[-1][0] == "class" else "function"
|
||||
self._add_symbol(node, kind, {"async": is_async})
|
||||
self.generic_visit(node)
|
||||
self._stack.pop()
|
||||
|
||||
def _add_symbol(self, node, kind: str, lang_payload: dict) -> None:
|
||||
names = [name for _, name in self._stack] + [node.name]
|
||||
qname = ".".join(names)
|
||||
symbol_id = sha256(f"{self._path}|{qname}|{kind}".encode("utf-8")).hexdigest()
|
||||
signature = self._signature(node)
|
||||
symbol = PySymbol(
|
||||
symbol_id=symbol_id,
|
||||
qname=qname,
|
||||
kind=kind,
|
||||
path=self._path,
|
||||
start_line=int(getattr(node, "lineno", 1)),
|
||||
end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1))),
|
||||
signature=signature,
|
||||
decorators=[self._expr_name(item) for item in getattr(node, "decorator_list", [])],
|
||||
docstring=ast.get_docstring(node),
|
||||
parent_symbol_id=self._stack[-1][1] if self._stack else None,
|
||||
lang_payload=lang_payload,
|
||||
)
|
||||
self.symbols.append(symbol)
|
||||
self._stack.append((kind, qname))
|
||||
|
||||
def _signature(self, node) -> str:
|
||||
if isinstance(node, ast.ClassDef):
|
||||
bases = ", ".join(self._expr_name(base) for base in node.bases)
|
||||
return f"{node.name}({bases})" if bases else node.name
|
||||
args = [arg.arg for arg in getattr(node.args, "args", [])]
|
||||
return f"{node.name}({', '.join(args)})"
|
||||
|
||||
def _expr_name(self, node) -> str:
|
||||
if isinstance(node, ast.Name):
|
||||
return node.id
|
||||
if isinstance(node, ast.Attribute):
|
||||
return f"{self._expr_name(node.value)}.{node.attr}"
|
||||
if isinstance(node, ast.Call):
|
||||
return self._expr_name(node.func)
|
||||
return ast.dump(node, include_attributes=False)
|
||||
Reference in New Issue
Block a user