Фиксация изменений

This commit is contained in:
2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions

View File

@@ -0,0 +1,113 @@
from __future__ import annotations
import json
import re
from app.modules.rag.intent_router_v2.models import ConversationState, IntentDecision
from app.modules.rag.intent_router_v2.protocols import TextGenerator
from app.modules.rag.intent_router_v2.test_signals import has_test_focus
_CODE_FILE_PATH_RE = re.compile(
r"\b(?:[\w.-]+/)*[\w.-]+\.(?:py|js|jsx|ts|tsx|java|kt|go|rb|php|c|cc|cpp|h|hpp|cs|swift|rs)(?!\w)\b",
re.IGNORECASE,
)
class IntentClassifierV2:
_GENERATE_DOCS_MARKERS = (
"сгенерируй документац",
"подготовь документац",
"создай документац",
"генерац",
"generate documentation",
"write documentation",
)
_DOCS_MARKERS = ("документац", "readme", "docs/", ".md", "spec", "runbook", "markdown")
_CODE_MARKERS = ("по коду", "код", "класс", "метод", "функц", "модул", "пакет", "файл", "block", "блок", "handler", "endpoint")
def __init__(self, llm: TextGenerator | None = None) -> None:
self._llm = llm
def classify(self, user_query: str, conversation_state: ConversationState) -> IntentDecision:
deterministic = self._deterministic(user_query)
if deterministic:
return deterministic
llm_decision = self._classify_with_llm(user_query, conversation_state)
if llm_decision:
return llm_decision
return IntentDecision(intent="PROJECT_MISC", confidence=0.55, reason="fallback_project_misc")
def _deterministic(self, user_query: str) -> IntentDecision | None:
text = " ".join((user_query or "").lower().split())
if any(marker in text for marker in self._GENERATE_DOCS_MARKERS):
return IntentDecision(intent="GENERATE_DOCS_FROM_CODE", confidence=0.97, reason="deterministic_generate_docs")
if self._looks_like_docs_question(text):
return IntentDecision(intent="DOCS_QA", confidence=0.9, reason="deterministic_docs")
if self._looks_like_code_question(user_query, text):
return IntentDecision(intent="CODE_QA", confidence=0.9, reason="deterministic_code")
return None
def _classify_with_llm(self, user_query: str, conversation_state: ConversationState) -> IntentDecision | None:
if self._llm is None:
return None
payload = json.dumps(
{
"message": user_query,
"active_intent": conversation_state.active_intent,
"last_query": conversation_state.last_query,
"allowed_intents": ["CODE_QA", "DOCS_QA", "GENERATE_DOCS_FROM_CODE", "PROJECT_MISC"],
},
ensure_ascii=False,
)
try:
raw = self._llm.generate("rag_intent_router_v2", payload, log_context="rag.intent_router_v2.classify").strip()
except Exception:
return None
parsed = self._parse(raw)
if parsed is None:
return None
return parsed
def _parse(self, raw: str) -> IntentDecision | None:
candidate = self._strip_code_fence(raw)
try:
payload = json.loads(candidate)
except json.JSONDecodeError:
return None
intent = str(payload.get("intent") or "").strip().upper()
if intent not in {"CODE_QA", "DOCS_QA", "GENERATE_DOCS_FROM_CODE", "PROJECT_MISC"}:
return None
return IntentDecision(
intent=intent,
confidence=float(payload.get("confidence") or 0.7),
reason=str(payload.get("reason") or "llm").strip() or "llm",
)
def _strip_code_fence(self, text: str) -> str:
if not text.startswith("```"):
return text
lines = text.splitlines()
if len(lines) < 3 or lines[-1].strip() != "```":
return text
return "\n".join(lines[1:-1]).strip()
def _looks_like_docs_question(self, text: str) -> bool:
if self._has_code_file_path(text):
return False
return any(marker in text for marker in self._DOCS_MARKERS)
def _looks_like_code_question(self, raw_text: str, lowered: str) -> bool:
if self._has_code_file_path(raw_text):
return True
if has_test_focus(lowered):
return True
if any(marker in lowered for marker in self._DOCS_MARKERS) and not any(marker in lowered for marker in self._CODE_MARKERS):
return False
if any(marker in lowered for marker in self._CODE_MARKERS):
return True
if re.search(r"\b[A-Z][A-Za-z0-9_]{2,}(?:\.[A-Za-z_][A-Za-z0-9_]*)*\b", raw_text or ""):
return True
return bool(re.search(r"\b[a-z_][A-Za-z0-9_]{2,}\(", raw_text or ""))
def _has_code_file_path(self, text: str) -> bool:
return bool(_CODE_FILE_PATH_RE.search(text or ""))