from __future__ import annotations import re def extract_query_terms(query_text: str) -> list[str]: raw_terms = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", query_text or "") normalized: list[str] = [] for term in raw_terms: for variant in _identifier_variants(term): if variant not in normalized: normalized.append(variant) for variant in _intent_variants(query_text): if variant not in normalized: normalized.append(variant) return normalized[:6] def _identifier_variants(term: str) -> list[str]: lowered = term.lower() variants = [lowered] snake = _camel_to_snake(term) if snake and snake not in variants: variants.append(snake) if lowered.endswith("manager") and len(lowered) > len("manager"): manager_split = lowered[: -len("manager")] + "_manager" if manager_split not in variants: variants.append(manager_split) compact = snake.replace("_", "") if snake else "" if compact and compact not in variants: variants.append(compact) return variants def _camel_to_snake(term: str) -> str: first = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", term) return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", first).lower() def _intent_variants(query_text: str) -> list[str]: lowered = (query_text or "").lower() variants: list[str] = [] if any(token in lowered for token in ("управ", "control", "manage", "management")): variants.extend(["control", "management", "start", "stop", "status"]) return variants