Фиксация изменений
This commit is contained in:
113
app/modules/rag/intent_router_v2/classifier.py
Normal file
113
app/modules/rag/intent_router_v2/classifier.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from app.modules.rag.intent_router_v2.models import ConversationState, IntentDecision
|
||||
from app.modules.rag.intent_router_v2.protocols import TextGenerator
|
||||
from app.modules.rag.intent_router_v2.test_signals import has_test_focus
|
||||
|
||||
_CODE_FILE_PATH_RE = re.compile(
|
||||
r"\b(?:[\w.-]+/)*[\w.-]+\.(?:py|js|jsx|ts|tsx|java|kt|go|rb|php|c|cc|cpp|h|hpp|cs|swift|rs)(?!\w)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class IntentClassifierV2:
|
||||
_GENERATE_DOCS_MARKERS = (
|
||||
"сгенерируй документац",
|
||||
"подготовь документац",
|
||||
"создай документац",
|
||||
"генерац",
|
||||
"generate documentation",
|
||||
"write documentation",
|
||||
)
|
||||
_DOCS_MARKERS = ("документац", "readme", "docs/", ".md", "spec", "runbook", "markdown")
|
||||
_CODE_MARKERS = ("по коду", "код", "класс", "метод", "функц", "модул", "пакет", "файл", "block", "блок", "handler", "endpoint")
|
||||
|
||||
def __init__(self, llm: TextGenerator | None = None) -> None:
|
||||
self._llm = llm
|
||||
|
||||
def classify(self, user_query: str, conversation_state: ConversationState) -> IntentDecision:
|
||||
deterministic = self._deterministic(user_query)
|
||||
if deterministic:
|
||||
return deterministic
|
||||
llm_decision = self._classify_with_llm(user_query, conversation_state)
|
||||
if llm_decision:
|
||||
return llm_decision
|
||||
return IntentDecision(intent="PROJECT_MISC", confidence=0.55, reason="fallback_project_misc")
|
||||
|
||||
def _deterministic(self, user_query: str) -> IntentDecision | None:
|
||||
text = " ".join((user_query or "").lower().split())
|
||||
if any(marker in text for marker in self._GENERATE_DOCS_MARKERS):
|
||||
return IntentDecision(intent="GENERATE_DOCS_FROM_CODE", confidence=0.97, reason="deterministic_generate_docs")
|
||||
if self._looks_like_docs_question(text):
|
||||
return IntentDecision(intent="DOCS_QA", confidence=0.9, reason="deterministic_docs")
|
||||
if self._looks_like_code_question(user_query, text):
|
||||
return IntentDecision(intent="CODE_QA", confidence=0.9, reason="deterministic_code")
|
||||
return None
|
||||
|
||||
def _classify_with_llm(self, user_query: str, conversation_state: ConversationState) -> IntentDecision | None:
|
||||
if self._llm is None:
|
||||
return None
|
||||
payload = json.dumps(
|
||||
{
|
||||
"message": user_query,
|
||||
"active_intent": conversation_state.active_intent,
|
||||
"last_query": conversation_state.last_query,
|
||||
"allowed_intents": ["CODE_QA", "DOCS_QA", "GENERATE_DOCS_FROM_CODE", "PROJECT_MISC"],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
try:
|
||||
raw = self._llm.generate("rag_intent_router_v2", payload, log_context="rag.intent_router_v2.classify").strip()
|
||||
except Exception:
|
||||
return None
|
||||
parsed = self._parse(raw)
|
||||
if parsed is None:
|
||||
return None
|
||||
return parsed
|
||||
|
||||
def _parse(self, raw: str) -> IntentDecision | None:
|
||||
candidate = self._strip_code_fence(raw)
|
||||
try:
|
||||
payload = json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
intent = str(payload.get("intent") or "").strip().upper()
|
||||
if intent not in {"CODE_QA", "DOCS_QA", "GENERATE_DOCS_FROM_CODE", "PROJECT_MISC"}:
|
||||
return None
|
||||
return IntentDecision(
|
||||
intent=intent,
|
||||
confidence=float(payload.get("confidence") or 0.7),
|
||||
reason=str(payload.get("reason") or "llm").strip() or "llm",
|
||||
)
|
||||
|
||||
def _strip_code_fence(self, text: str) -> str:
|
||||
if not text.startswith("```"):
|
||||
return text
|
||||
lines = text.splitlines()
|
||||
if len(lines) < 3 or lines[-1].strip() != "```":
|
||||
return text
|
||||
return "\n".join(lines[1:-1]).strip()
|
||||
|
||||
def _looks_like_docs_question(self, text: str) -> bool:
|
||||
if self._has_code_file_path(text):
|
||||
return False
|
||||
return any(marker in text for marker in self._DOCS_MARKERS)
|
||||
|
||||
def _looks_like_code_question(self, raw_text: str, lowered: str) -> bool:
|
||||
if self._has_code_file_path(raw_text):
|
||||
return True
|
||||
if has_test_focus(lowered):
|
||||
return True
|
||||
if any(marker in lowered for marker in self._DOCS_MARKERS) and not any(marker in lowered for marker in self._CODE_MARKERS):
|
||||
return False
|
||||
if any(marker in lowered for marker in self._CODE_MARKERS):
|
||||
return True
|
||||
if re.search(r"\b[A-Z][A-Za-z0-9_]{2,}(?:\.[A-Za-z_][A-Za-z0-9_]*)*\b", raw_text or ""):
|
||||
return True
|
||||
return bool(re.search(r"\b[a-z_][A-Za-z0-9_]{2,}\(", raw_text or ""))
|
||||
|
||||
def _has_code_file_path(self, text: str) -> bool:
|
||||
return bool(_CODE_FILE_PATH_RE.search(text or ""))
|
||||
Reference in New Issue
Block a user