Новый раг
This commit is contained in:
Binary file not shown.
Binary file not shown.
43
app/modules/rag/retrieval/query_router.py
Normal file
43
app/modules/rag/retrieval/query_router.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.modules.rag.contracts import RagLayer, RetrievalMode
|
||||
|
||||
|
||||
class RagQueryRouter:
|
||||
_CODE_HINTS = (
|
||||
"как работает код",
|
||||
"explain code",
|
||||
"explain the code",
|
||||
"по коду",
|
||||
"из кода",
|
||||
"построй документацию по коду",
|
||||
"документацию по коду",
|
||||
"where is implemented",
|
||||
"где реализовано",
|
||||
"endpoint",
|
||||
"handler",
|
||||
"symbol",
|
||||
"function",
|
||||
"class",
|
||||
"method",
|
||||
)
|
||||
|
||||
_DOCS_LAYERS = [
|
||||
RagLayer.DOCS_MODULE_CATALOG,
|
||||
RagLayer.DOCS_FACT_INDEX,
|
||||
RagLayer.DOCS_SECTION_INDEX,
|
||||
RagLayer.DOCS_POLICY_INDEX,
|
||||
]
|
||||
_CODE_LAYERS = [
|
||||
RagLayer.CODE_ENTRYPOINTS,
|
||||
RagLayer.CODE_SYMBOL_CATALOG,
|
||||
RagLayer.CODE_DEPENDENCY_GRAPH,
|
||||
RagLayer.CODE_SOURCE_CHUNKS,
|
||||
]
|
||||
|
||||
def resolve_mode(self, query: str) -> str:
|
||||
lowered = query.lower()
|
||||
return RetrievalMode.CODE if any(hint in lowered for hint in self._CODE_HINTS) else RetrievalMode.DOCS
|
||||
|
||||
def layers_for_mode(self, mode: str) -> list[str]:
|
||||
return list(self._CODE_LAYERS if mode == RetrievalMode.CODE else self._DOCS_LAYERS)
|
||||
45
app/modules/rag/retrieval/query_terms.py
Normal file
45
app/modules/rag/retrieval/query_terms.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def extract_query_terms(query_text: str) -> list[str]:
|
||||
raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
|
||||
normalized: list[str] = []
|
||||
for term in raw_terms:
|
||||
for variant in _identifier_variants(term):
|
||||
if variant not in normalized:
|
||||
normalized.append(variant)
|
||||
for variant in _intent_variants(query_text):
|
||||
if variant not in normalized:
|
||||
normalized.append(variant)
|
||||
return normalized[:6]
|
||||
|
||||
|
||||
def _identifier_variants(term: str) -> list[str]:
|
||||
lowered = term.lower()
|
||||
variants = [lowered]
|
||||
snake = _camel_to_snake(term)
|
||||
if snake and snake not in variants:
|
||||
variants.append(snake)
|
||||
if lowered.endswith("manager") and len(lowered) > len("manager"):
|
||||
manager_split = lowered[: -len("manager")] + "_manager"
|
||||
if manager_split not in variants:
|
||||
variants.append(manager_split)
|
||||
compact = snake.replace("_", "") if snake else ""
|
||||
if compact and compact not in variants:
|
||||
variants.append(compact)
|
||||
return variants
|
||||
|
||||
|
||||
def _camel_to_snake(term: str) -> str:
|
||||
first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
|
||||
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
|
||||
|
||||
|
||||
def _intent_variants(query_text: str) -> list[str]:
|
||||
lowered = (query_text or "").lower()
|
||||
variants: list[str] = []
|
||||
if any(token in lowered for token in ("управ", "control", "manage", "management")):
|
||||
variants.extend(["control", "management", "start", "stop", "status"])
|
||||
return variants
|
||||
Reference in New Issue
Block a user