from app.modules.rag.persistence.retrieval_statement_builder import RetrievalStatementBuilder from app.modules.rag.retrieval.test_filter import build_test_filters, is_test_path def test_retrieve_builder_adds_test_exclusion_filters() -> None: builder = RetrievalStatementBuilder() test_filters = build_test_filters() sql, params = builder.build_retrieve( "rag-1", [0.1, 0.2], query_text="Explain user service", layers=["C0_SOURCE_CHUNKS"], exclude_path_prefixes=test_filters.exclude_path_prefixes, exclude_like_patterns=test_filters.exclude_like_patterns, ) assert "NOT (" in sql assert "vector_dims(embedding) = vector_dims(CAST(:emb AS vector))" in sql assert "path LIKE :exclude_prefix_0" in sql assert "lower(path) LIKE :exclude_like_0" in sql assert "ESCAPE E'\\\\'" in sql assert params["exclude_prefix_0"] == "tests/%" assert "%.test.%" in params.values() assert "%\\_test.%" in params.values() def test_retrieve_builder_adds_prefer_bonus_sorting() -> None: builder = RetrievalStatementBuilder() sql, params = builder.build_retrieve( "rag-1", [0.1, 0.2], query_text="find context tests", layers=["C1_SYMBOL_CATALOG"], prefer_path_prefixes=["tests/"], prefer_like_patterns=["%/test\\_%.py"], ) assert "AS prefer_bonus" in sql assert "AS structural_rank" in sql assert "WHEN layer = 'C4_SEMANTIC_ROLES' THEN 2" in sql assert "ORDER BY prefer_bonus ASC, test_penalty ASC, layer_rank ASC" in sql assert params["prefer_prefix_0"] == "tests/%" assert params["prefer_like_0"] == "%/test\\_%.py" def test_lexical_builder_omits_test_filters_when_not_requested() -> None: builder = RetrievalStatementBuilder() sql, params = builder.build_lexical_code( "rag-1", query_text="Explain user service", prefer_non_tests=False, ) assert sql is not None assert "exclude_prefix" not in sql assert "exclude_like" not in sql assert not any(key.startswith("exclude_") for key in params) def test_test_filter_does_not_treat_contest_file_as_test() -> None: assert is_test_path("app/contest.py") is False assert is_test_path("tests/test_users.py") is True