Новый раг
This commit is contained in:
45
app/modules/rag/retrieval/query_terms.py
Normal file
45
app/modules/rag/retrieval/query_terms.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def extract_query_terms(query_text: str) -> list[str]:
|
||||
raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
|
||||
normalized: list[str] = []
|
||||
for term in raw_terms:
|
||||
for variant in _identifier_variants(term):
|
||||
if variant not in normalized:
|
||||
normalized.append(variant)
|
||||
for variant in _intent_variants(query_text):
|
||||
if variant not in normalized:
|
||||
normalized.append(variant)
|
||||
return normalized[:6]
|
||||
|
||||
|
||||
def _identifier_variants(term: str) -> list[str]:
|
||||
lowered = term.lower()
|
||||
variants = [lowered]
|
||||
snake = _camel_to_snake(term)
|
||||
if snake and snake not in variants:
|
||||
variants.append(snake)
|
||||
if lowered.endswith("manager") and len(lowered) > len("manager"):
|
||||
manager_split = lowered[: -len("manager")] + "_manager"
|
||||
if manager_split not in variants:
|
||||
variants.append(manager_split)
|
||||
compact = snake.replace("_", "") if snake else ""
|
||||
if compact and compact not in variants:
|
||||
variants.append(compact)
|
||||
return variants
|
||||
|
||||
|
||||
def _camel_to_snake(term: str) -> str:
|
||||
first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
|
||||
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
|
||||
|
||||
|
||||
def _intent_variants(query_text: str) -> list[str]:
|
||||
lowered = (query_text or "").lower()
|
||||
variants: list[str] = []
|
||||
if any(token in lowered for token in ("управ", "control", "manage", "management")):
|
||||
variants.extend(["control", "management", "start", "stop", "status"])
|
||||
return variants
|
||||
Reference in New Issue
Block a user