import pytest from app.modules.rag.intent_router_v2.analysis.normalization import QueryNormalizer pytestmark = pytest.mark.intent_router def test_query_normalizer_collapses_whitespace() -> None: normalizer = QueryNormalizer() normalized = normalizer.normalize(" Объясни как работает \n класс X ") assert normalized == "Объясни как работает класс X" def test_query_normalizer_canonicalizes_quotes() -> None: normalizer = QueryNormalizer() normalized = normalizer.normalize('Уточни «текст» и “текст”') assert normalized == 'Уточни "текст" и "текст"' def test_query_normalizer_preserves_backticks_verbatim() -> None: normalizer = QueryNormalizer() normalized = normalizer.normalize("Уточни по коду `def build(x):` ") assert normalized == "Уточни по коду `def build(x):`" def test_query_normalizer_preserves_latin_and_cyrillic_file_paths() -> None: normalizer = QueryNormalizer() normalized = normalizer.normalize("Сверь app/core/config.py и «docs/руководство.md»") assert "app/core/config.py" in normalized assert "docs/руководство.md" in normalized assert "config. py" not in normalized assert "руководство. md" not in normalized def test_query_normalizer_punctuation_spacing_does_not_break_extensions() -> None: normalizer = QueryNormalizer() normalized = normalizer.normalize("Проверь docs/spec.md , затем app/main.py !") assert "docs/spec.md" in normalized assert "app/main.py" in normalized assert "spec. md" not in normalized assert "main. py" not in normalized def test_query_normalizer_idempotent_and_without_enrichment() -> None: normalizer = QueryNormalizer() raw = ' Прочитай «README.md» и docs/spec.md ' once = normalizer.normalize(raw) twice = normalizer.normalize(once) assert twice == once assert "documentation" not in once.lower() assert "class" not in once.lower()