Files
agent/app/modules/rag/retrieval/query_terms.py
2026-03-01 14:21:33 +03:00

46 lines
1.5 KiB
Python

from __future__ import annotations
import re
def extract_query_terms(query_text: str) -> list[str]:
raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
normalized: list[str] = []
for term in raw_terms:
for variant in _identifier_variants(term):
if variant not in normalized:
normalized.append(variant)
for variant in _intent_variants(query_text):
if variant not in normalized:
normalized.append(variant)
return normalized[:6]
def _identifier_variants(term: str) -> list[str]:
lowered = term.lower()
variants = [lowered]
snake = _camel_to_snake(term)
if snake and snake not in variants:
variants.append(snake)
if lowered.endswith("manager") and len(lowered) > len("manager"):
manager_split = lowered[: -len("manager")] + "_manager"
if manager_split not in variants:
variants.append(manager_split)
compact = snake.replace("_", "") if snake else ""
if compact and compact not in variants:
variants.append(compact)
return variants
def _camel_to_snake(term: str) -> str:
first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
def _intent_variants(query_text: str) -> list[str]:
lowered = (query_text or "").lower()
variants: list[str] = []
if any(token in lowered for token in ("управ", "control", "manage", "management")):
variants.extend(["control", "management", "start", "stop", "status"])
return variants