Фиксация изменений
This commit is contained in:
102
app/modules/rag/explain/intent_builder.py
Normal file
102
app/modules/rag/explain/intent_builder.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from app.modules.rag.explain.models import ExplainHints, ExplainIntent
|
||||
from app.modules.rag.retrieval.query_terms import extract_query_terms
|
||||
|
||||
|
||||
class ExplainIntentBuilder:
|
||||
_ROUTE_RE = re.compile(r"(/[A-Za-z0-9_./{}:-]+)")
|
||||
_FILE_RE = re.compile(r"([A-Za-z0-9_./-]+\.py)")
|
||||
_SYMBOL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*|[A-Z][A-Za-z0-9_]{2,}|[a-z_][A-Za-z0-9_]{2,})\b")
|
||||
_COMMAND_RE = re.compile(r"`([A-Za-z0-9:_-]+)`")
|
||||
_TEST_KEYWORDS = (
|
||||
"тест",
|
||||
"tests",
|
||||
"test ",
|
||||
"unit-test",
|
||||
"unit test",
|
||||
"юнит-тест",
|
||||
"pytest",
|
||||
"spec",
|
||||
"как покрыто тестами",
|
||||
"как проверяется",
|
||||
"how is it tested",
|
||||
"how it's tested",
|
||||
)
|
||||
|
||||
def build(self, user_query: str) -> ExplainIntent:
|
||||
normalized = " ".join((user_query or "").split())
|
||||
lowered = normalized.lower()
|
||||
keywords = self._keywords(normalized)
|
||||
hints = ExplainHints(
|
||||
paths=self._dedupe(self._FILE_RE.findall(normalized)),
|
||||
symbols=self._symbols(normalized),
|
||||
endpoints=self._dedupe(self._ROUTE_RE.findall(normalized)),
|
||||
commands=self._commands(normalized, lowered),
|
||||
)
|
||||
return ExplainIntent(
|
||||
raw_query=user_query,
|
||||
normalized_query=normalized,
|
||||
keywords=keywords[:12],
|
||||
hints=hints,
|
||||
include_tests=self._include_tests(lowered),
|
||||
expected_entry_types=self._entry_types(lowered, hints),
|
||||
depth=self._depth(lowered),
|
||||
)
|
||||
|
||||
def _keywords(self, text: str) -> list[str]:
|
||||
keywords = extract_query_terms(text)
|
||||
for token in self._symbols(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
for token in self._ROUTE_RE.findall(text):
|
||||
if token not in keywords:
|
||||
keywords.append(token)
|
||||
return self._dedupe(keywords)
|
||||
|
||||
def _symbols(self, text: str) -> list[str]:
|
||||
values = []
|
||||
for raw in self._SYMBOL_RE.findall(text):
|
||||
token = raw.strip()
|
||||
if len(token) < 3:
|
||||
continue
|
||||
if token.endswith(".py"):
|
||||
continue
|
||||
values.append(token)
|
||||
return self._dedupe(values)
|
||||
|
||||
def _commands(self, text: str, lowered: str) -> list[str]:
|
||||
values = list(self._COMMAND_RE.findall(text))
|
||||
if " command " in f" {lowered} ":
|
||||
values.extend(re.findall(r"command\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
if " cli " in f" {lowered} ":
|
||||
values.extend(re.findall(r"cli\s+([A-Za-z0-9:_-]+)", lowered))
|
||||
return self._dedupe(values)
|
||||
|
||||
def _entry_types(self, lowered: str, hints: ExplainHints) -> list[str]:
|
||||
if hints.endpoints or any(token in lowered for token in ("endpoint", "route", "handler", "http", "api")):
|
||||
return ["http"]
|
||||
if hints.commands or any(token in lowered for token in ("cli", "command", "click", "typer")):
|
||||
return ["cli"]
|
||||
return ["http", "cli"]
|
||||
|
||||
def _depth(self, lowered: str) -> str:
|
||||
if any(token in lowered for token in ("deep", "подроб", "деталь", "full flow", "trace")):
|
||||
return "deep"
|
||||
if any(token in lowered for token in ("high level", "overview", "кратко", "summary")):
|
||||
return "high"
|
||||
return "medium"
|
||||
|
||||
def _include_tests(self, lowered: str) -> bool:
|
||||
normalized = f" {lowered} "
|
||||
return any(token in normalized for token in self._TEST_KEYWORDS)
|
||||
|
||||
def _dedupe(self, values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
if item and item not in result:
|
||||
result.append(item)
|
||||
return result
|
||||
Reference in New Issue
Block a user