Фиксация изменений

This commit is contained in:
2026-03-05 11:03:17 +03:00
parent 1ef0b4d68c
commit 417b8b6f72
261 changed files with 8215 additions and 332 deletions

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import re
from app.modules.rag.intent_router_v2.normalization_terms import KeyTermCanonicalizer
_WORD_RE = re.compile(r"[A-Za-zА-Яа-яЁё-]+")
class RuEnTermMapper:
_CANONICAL_MAP = {
"класс": ["class"],
"метод": ["method"],
"функция": ["function", "def"],
"модуль": ["module"],
"пакет": ["package"],
"файл": ["file"],
"тест": ["test", "unit test"],
"документация": ["documentation", "docs"],
"readme": ["readme"],
}
_ENGLISH_SOURCES = {
"class": ["class"],
"method": ["method"],
"function": ["function", "def"],
"module": ["module"],
"package": ["package"],
"file": ["file"],
"test": ["test", "unit test"],
"tests": ["test", "unit test"],
"documentation": ["documentation", "docs"],
"docs": ["documentation", "docs"],
"readme": ["readme"],
"def": ["def"],
}
def __init__(self, canonicalizer: KeyTermCanonicalizer | None = None) -> None:
self._canonicalizer = canonicalizer or KeyTermCanonicalizer()
def expand(self, text: str) -> list[str]:
expansions: list[str] = []
lowered = (text or "").lower()
for token in _WORD_RE.findall(lowered):
canonical = self._canonicalizer.canonicalize(token) or token
self._extend(expansions, self._CANONICAL_MAP.get(canonical, []))
self._extend(expansions, self._ENGLISH_SOURCES.get(token, []))
if "unit test" in lowered or "unit tests" in lowered:
self._extend(expansions, self._ENGLISH_SOURCES["test"])
return expansions
def key_terms(self) -> tuple[str, ...]:
return tuple(self._CANONICAL_MAP.keys())
def all_literal_terms(self) -> tuple[str, ...]:
values = set(self._canonicalizer.aliases())
values.update(self._CANONICAL_MAP.keys())
values.update(self._ENGLISH_SOURCES.keys())
for targets in self._CANONICAL_MAP.values():
values.update(target.lower() for target in targets)
for targets in self._ENGLISH_SOURCES.values():
values.update(target.lower() for target in targets)
return tuple(sorted(values))
def _extend(self, result: list[str], values: list[str]) -> None:
for value in values:
if value not in result:
result.append(value)