Новый раг

This commit is contained in:
2026-03-01 14:21:33 +03:00
parent 2728c07ba9
commit 1ef0b4d68c
95 changed files with 3145 additions and 927 deletions

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from app.modules.rag.contracts import RagLayer, RetrievalMode
class RagQueryRouter:
_CODE_HINTS = (
"как работает код",
"explain code",
"explain the code",
"по коду",
"из кода",
"построй документацию по коду",
"документацию по коду",
"where is implemented",
"где реализовано",
"endpoint",
"handler",
"symbol",
"function",
"class",
"method",
)
_DOCS_LAYERS = [
RagLayer.DOCS_MODULE_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_SECTION_INDEX,
RagLayer.DOCS_POLICY_INDEX,
]
_CODE_LAYERS = [
RagLayer.CODE_ENTRYPOINTS,
RagLayer.CODE_SYMBOL_CATALOG,
RagLayer.CODE_DEPENDENCY_GRAPH,
RagLayer.CODE_SOURCE_CHUNKS,
]
def resolve_mode(self, query: str) -> str:
lowered = query.lower()
return RetrievalMode.CODE if any(hint in lowered for hint in self._CODE_HINTS) else RetrievalMode.DOCS
def layers_for_mode(self, mode: str) -> list[str]:
return list(self._CODE_LAYERS if mode == RetrievalMode.CODE else self._DOCS_LAYERS)

View File

@@ -0,0 +1,45 @@
from __future__ import annotations
import re
def extract_query_terms(query_text: str) -> list[str]:
raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
normalized: list[str] = []
for term in raw_terms:
for variant in _identifier_variants(term):
if variant not in normalized:
normalized.append(variant)
for variant in _intent_variants(query_text):
if variant not in normalized:
normalized.append(variant)
return normalized[:6]
def _identifier_variants(term: str) -> list[str]:
lowered = term.lower()
variants = [lowered]
snake = _camel_to_snake(term)
if snake and snake not in variants:
variants.append(snake)
if lowered.endswith("manager") and len(lowered) > len("manager"):
manager_split = lowered[: -len("manager")] + "_manager"
if manager_split not in variants:
variants.append(manager_split)
compact = snake.replace("_", "") if snake else ""
if compact and compact not in variants:
variants.append(compact)
return variants
def _camel_to_snake(term: str) -> str:
first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
def _intent_variants(query_text: str) -> list[str]:
lowered = (query_text or "").lower()
variants: list[str] = []
if any(token in lowered for token in ("управ", "control", "manage", "management")):
variants.extend(["control", "management", "start", "stop", "status"])
return variants