from __future__ import annotations import re from app.modules.rag.explain.models import ExplainHints, ExplainIntent from app.modules.rag.retrieval.query_terms import extract_query_terms class ExplainIntentBuilder: _ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)") _FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)") _SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b") _COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`") _TEST_KEYWORDS = ( "тест", "tests", "test ", "unit-test", "unit test", "юнит-тест", "pytest", "spec", "как покрыто тестами", "как проверяется", "how is it tested", "how it's tested", ) def build(self, user_query: str) -> ExplainIntent: normalized = " ".join((user_query or "").split()) lowered = normalized.lower() keywords = self._keywords(normalized) hints = ExplainHints( paths=self._dedupe(self._FILE_RE.findall(normalized)), symbols=self._symbols(normalized), endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)), commands=self._commands(normalized, lowered), ) return ExplainIntent( raw_query=user_query, normalized_query=normalized, keywords=keywords[:12], hints=hints, include_tests=self._include_tests(lowered), expected_entry_types=self._entry_types(lowered, hints), depth=self._depth(lowered), ) def _keywords(self, text: str) -> list[str]: keywords = extract_query_terms(text) for token in self._symbols(text): if token not in keywords: keywords.append(token) for token in self._ROUTE_RE.findall(text): if token not in keywords: keywords.append(token) return self._dedupe(keywords) def _symbols(self, text: str) -> list[str]: values = [] for raw in self._SYMBOL_RE.findall(text): token = raw.strip() if len(token) < 3: continue if token.endswith(".py"): continue values.append(token) return self._dedupe(values) def _commands(self, text: str, lowered: str) -> list[str]: values = list(self._COMMAND_RE.findall(text)) if " command " in f" {lowered} ": values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered)) if " cli " in f" {lowered} ": values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered)) return self._dedupe(values) def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]: if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")): return ["http"] if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")): return ["cli"] return ["http", "cli"] def _depth(self, lowered: str) -> str: if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")): return "deep" if any(token in lowered for token in ("high level", "overview", "кратко", "summary")): return "high" return "medium" def _include_tests(self, lowered: str) -> bool: normalized = f" {lowered} " return any(token in normalized for token in self._TEST_KEYWORDS) def _dedupe(self, values: list[str]) -> list[str]: result: list[str] = [] for value in values: item = value.strip() if item and item not in result: result.append(item) return result