46 lines
1.5 KiB
Python
46 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
|
|
def extract_query_terms(query_text: str) -> list[str]:
|
|
raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "")
|
|
normalized: list[str] = []
|
|
for term in raw_terms:
|
|
for variant in _identifier_variants(term):
|
|
if variant not in normalized:
|
|
normalized.append(variant)
|
|
for variant in _intent_variants(query_text):
|
|
if variant not in normalized:
|
|
normalized.append(variant)
|
|
return normalized[:6]
|
|
|
|
|
|
def _identifier_variants(term: str) -> list[str]:
|
|
lowered = term.lower()
|
|
variants = [lowered]
|
|
snake = _camel_to_snake(term)
|
|
if snake and snake not in variants:
|
|
variants.append(snake)
|
|
if lowered.endswith("manager") and len(lowered) > len("manager"):
|
|
manager_split = lowered[: -len("manager")] + "_manager"
|
|
if manager_split not in variants:
|
|
variants.append(manager_split)
|
|
compact = snake.replace("_", "") if snake else ""
|
|
if compact and compact not in variants:
|
|
variants.append(compact)
|
|
return variants
|
|
|
|
|
|
def _camel_to_snake(term: str) -> str:
|
|
first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term)
|
|
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower()
|
|
|
|
|
|
def _intent_variants(query_text: str) -> list[str]:
|
|
lowered = (query_text or "").lower()
|
|
variants: list[str] = []
|
|
if any(token in lowered for token in ("управ", "control", "manage", "management")):
|
|
variants.extend(["control", "management", "start", "stop", "status"])
|
|
return variants
|