фиксирую состояние
This commit is contained in:
@@ -6,7 +6,10 @@ Differences from `v3`:
|
||||
|
||||
- each YAML case targets a single isolated component;
|
||||
- results are written next to the suite in `cases/.../test_runs/...`;
|
||||
- the first supported component is `process_v2_intent_router`.
|
||||
- supported components are `process_v2_intent_router` and `process_v2_retrieval_policy_resolver`.
|
||||
Also available: `process_v2_router_plus_retrieval_policy` for the linked route -> plan chain,
|
||||
`process_v2_router_plus_retrieval_policy_rag` for the linked route -> plan -> rag chain,
|
||||
and `process_v2_full_chain` for the full route -> plan -> rag -> evidence -> workflow LLM chain.
|
||||
|
||||
## Run
|
||||
|
||||
@@ -23,3 +26,48 @@ PYTHONPATH=. python -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_02/process_v2_intent_router/router_llm_first_v3.yaml \
|
||||
--run-name llm_first_v3
|
||||
```
|
||||
|
||||
Retrieval policy resolver suite:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_03/process_v2_retrieval_policy_resolver/cases.yaml \
|
||||
--run-name retrieval_policy_v1
|
||||
```
|
||||
|
||||
Linked router + retrieval policy suite:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python3 -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_04/process_v2_router_plus_retrieval_policy \
|
||||
--run-name router_plus_policy_v1
|
||||
```
|
||||
|
||||
Inside `suite_04`, cases are split into:
|
||||
|
||||
- `strict_regression_cases.yaml` for contract-level invariants
|
||||
- `soft_observational_cases.yaml` for LLM-sensitive boundary scenarios
|
||||
|
||||
Quality-gate mini-pack:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=. python3 -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_05/process_v2_router_plus_retrieval_policy_quality_gate/cases.yaml \
|
||||
--run-name router_plus_policy_qg_v1
|
||||
```
|
||||
|
||||
Linked router + retrieval policy + rag suite:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=src:. DATABASE_URL='postgresql+psycopg://agent:agent@127.0.0.1:5432/agent' python3 -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_06/process_v2_router_plus_retrieval_policy_rag/cases.yaml \
|
||||
--run-name router_plus_policy_rag_v1
|
||||
```
|
||||
|
||||
Full process v2 chain with workflow LLM:
|
||||
|
||||
```bash
|
||||
PYTHONPATH=src:. DATABASE_URL='postgresql+psycopg://agent:agent@127.0.0.1:5432/agent' python3 -m tests.pipeline_setup_v4.run \
|
||||
--cases-dir tests/pipeline_setup_v4/cases/suite_07/process_v2_full_chain/cases.yaml \
|
||||
--run-name process_v2_full_chain_v1
|
||||
```
|
||||
|
||||
+540
@@ -0,0 +1,540 @@
|
||||
defaults:
|
||||
component: process_v2_retrieval_policy_resolver
|
||||
|
||||
cases:
|
||||
- id: general-overview-grounded
|
||||
route:
|
||||
routing_domain: GENERAL
|
||||
intent: GENERAL_QA
|
||||
subintent: SUMMARY
|
||||
user_query: "Что это за сервис?"
|
||||
normalized_query: "что это за сервис"
|
||||
anchors:
|
||||
target_doc_hints: []
|
||||
endpoint_paths: []
|
||||
expected:
|
||||
plan:
|
||||
profile: general_qa_grounded_summary
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes: [docs/architecture/, docs/]
|
||||
|
||||
- id: general-does-not-become-docs-summary
|
||||
route:
|
||||
routing_domain: GENERAL
|
||||
intent: GENERAL_QA
|
||||
subintent: SUMMARY
|
||||
user_query: "Дай общий обзор, включая /health"
|
||||
normalized_query: "дай общий обзор включая /health"
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
matched_aliases: ["api"]
|
||||
expected:
|
||||
plan:
|
||||
profile: general_qa_grounded_summary
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
|
||||
- id: find-files-with-target-hint
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Покажи файл про health endpoint"
|
||||
normalized_query: "покажи файл про health endpoint"
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
path_prefixes: [docs/api/]
|
||||
prefer_like_patterns: ["%health-endpoint.md%"]
|
||||
|
||||
- id: find-files-endpoint-only
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Где описан /send?"
|
||||
normalized_query: "где описан /send"
|
||||
anchors:
|
||||
endpoint_paths: ["/send"]
|
||||
target_doc_hints: []
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
prefer_like_patterns: ["%/send%"]
|
||||
|
||||
- id: find-files-entities-and-domain
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "В каком документе описан ManualSendWorker?"
|
||||
normalized_query: "в каком документе описан manualsendworker"
|
||||
anchors:
|
||||
entity_names: ["ManualSendWorker"]
|
||||
matched_aliases: ["manual send"]
|
||||
process_domain: "messaging"
|
||||
process_subdomain: "manual_send"
|
||||
target_doc_hints: []
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
metadata.domain: messaging
|
||||
metadata.subdomain: manual_send
|
||||
prefer_path_prefixes: [docs/domains/, docs/, docs/logic/]
|
||||
prefer_like_patterns: ["%manualsendworker%", "%manual send%", "%messaging%", "%manual_send%"]
|
||||
|
||||
- id: docs-summary-api-endpoint-health
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Объясни /health"
|
||||
normalized_query: "объясни /health"
|
||||
target_terms: ["health", "/health"]
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
layers: [D1_DOCUMENT_CATALOG, D2_FACT_INDEX, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
prefer_path_prefixes: [docs/api/, docs/]
|
||||
|
||||
- id: docs-summary-architecture
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Как устроена архитектура сервиса?"
|
||||
normalized_query: "как устроена архитектура сервиса"
|
||||
anchors:
|
||||
file_names: ["docs/architecture/runtime-manager.md"]
|
||||
target_doc_hints: ["docs/architecture/runtime-manager.md"]
|
||||
matched_aliases: ["architecture"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_architecture
|
||||
layers: [D1_DOCUMENT_CATALOG, D5_RELATION_GRAPH, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/architecture/runtime-manager.md"]
|
||||
prefer_path_prefixes: [docs/architecture/, docs/]
|
||||
|
||||
- id: docs-summary-logic-flow
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Опиши workflow отправки уведомлений"
|
||||
normalized_query: "опиши workflow отправки уведомлений"
|
||||
anchors:
|
||||
matched_aliases: ["workflow"]
|
||||
process_domain: "notifications"
|
||||
process_subdomain: "delivery_loop"
|
||||
target_doc_hints: []
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_logic_flow
|
||||
layers: [D4_WORKFLOW_INDEX, D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
metadata.domain: notifications
|
||||
metadata.subdomain: delivery_loop
|
||||
prefer_path_prefixes: [docs/logic/, docs/architecture/, docs/]
|
||||
|
||||
- id: docs-summary-domain-entity
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Что такое RuntimeManager?"
|
||||
normalized_query: "что такое runtimemanager"
|
||||
anchors:
|
||||
entity_names: ["RuntimeManager"]
|
||||
process_domain: "runtime"
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_domain_entity
|
||||
layers: [D3_ENTITY_CATALOG, D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
metadata.domain: runtime
|
||||
prefer_path_prefixes: [docs/domains/, docs/, docs/api/]
|
||||
prefer_like_patterns: ["%runtimemanager%", "%runtime%"]
|
||||
|
||||
- id: docs-summary-generic-weak-signals
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Дай краткое summary документации"
|
||||
normalized_query: "дай краткое summary документации"
|
||||
anchors:
|
||||
target_doc_hints: []
|
||||
endpoint_paths: []
|
||||
entity_names: []
|
||||
matched_aliases: []
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes: [docs/]
|
||||
|
||||
- id: docs-summary-generic-conflicting-signals
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Как связан /health и RuntimeManager?"
|
||||
normalized_query: "как связан /health и runtimemanager"
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
entity_names: ["RuntimeManager"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
|
||||
- id: find-files-stays-file-lookup-on-mixed-signals
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Найди документ по architecture runtime manager"
|
||||
normalized_query: "найди документ по architecture runtime manager"
|
||||
anchors:
|
||||
entity_names: ["RuntimeManager"]
|
||||
matched_aliases: ["architecture"]
|
||||
file_names: ["docs/architecture/runtime-manager.md"]
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
path_prefixes: [docs/architecture/]
|
||||
|
||||
- id: resolver-survives-partial-empty-anchors
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Что там по docs?"
|
||||
normalized_query: "что там по docs"
|
||||
anchors:
|
||||
entity_names: []
|
||||
file_names: [""]
|
||||
endpoint_paths: []
|
||||
target_doc_hints: []
|
||||
matched_aliases: []
|
||||
process_domain:
|
||||
process_subdomain:
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
|
||||
- id: find-files-file-name-priority
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Покажи документ manual-send"
|
||||
normalized_query: "покажи документ manual-send"
|
||||
anchors:
|
||||
file_names: ["docs/workflows/manual-send.md"]
|
||||
matched_aliases: ["manual send"]
|
||||
target_doc_hints: []
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
path_prefixes: [docs/workflows/]
|
||||
prefer_like_patterns: ["%docs/workflows/manual-send.md%", "%manual send%"]
|
||||
|
||||
- id: conflict-api-hint-vs-workflow-metadata
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Опиши flow для /health в notification loop"
|
||||
normalized_query: "опиши flow для /health в notification loop"
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
matched_aliases: ["workflow"]
|
||||
process_domain: "notifications"
|
||||
process_subdomain: "delivery_loop"
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/api/health-endpoint.md"]
|
||||
metadata.domain: notifications
|
||||
metadata.subdomain: delivery_loop
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
|
||||
- id: conflict-file-name-vs-architecture-alias
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Объясни architecture для notification loop"
|
||||
normalized_query: "объясни architecture для notification loop"
|
||||
anchors:
|
||||
file_names: ["docs/logic/notification-loop.md"]
|
||||
matched_aliases: ["architecture"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes: [docs/architecture/, docs/, docs/logic/]
|
||||
prefer_like_patterns: ["%docs/logic/notification-loop.md%", "%architecture%"]
|
||||
|
||||
- id: conflict-hint-vs-entity-soft-signals
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Что делает /send и ManualSendWorker?"
|
||||
normalized_query: "что делает /send и manualsendworker"
|
||||
anchors:
|
||||
endpoint_paths: ["/send"]
|
||||
target_doc_hints: ["docs/api/send-endpoint.md"]
|
||||
entity_names: ["ManualSendWorker"]
|
||||
matched_aliases: ["manual send"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/api/send-endpoint.md"]
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
prefer_like_patterns: ["%send-endpoint.md%", "%/send%", "%manualsendworker%", "%manual send%"]
|
||||
|
||||
- id: metadata-only-find-files
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Найди документы по notifications delivery loop"
|
||||
normalized_query: "найди документы по notifications delivery loop"
|
||||
anchors:
|
||||
process_domain: "notifications"
|
||||
process_subdomain: "delivery_loop"
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
path_prefixes: [docs/]
|
||||
metadata.domain: notifications
|
||||
metadata.subdomain: delivery_loop
|
||||
prefer_path_prefixes: [docs/, docs/domains/, docs/logic/]
|
||||
|
||||
- id: metadata-only-generic-summary
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Дай summary по notifications delivery loop"
|
||||
normalized_query: "дай summary по notifications delivery loop"
|
||||
anchors:
|
||||
process_domain: "notifications"
|
||||
process_subdomain: "delivery_loop"
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
metadata.domain: notifications
|
||||
metadata.subdomain: delivery_loop
|
||||
prefer_path_prefixes: [docs/]
|
||||
prefer_like_patterns: ["%notifications%", "%delivery_loop%"]
|
||||
|
||||
- id: metadata-domain-entity-with-alias
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Объясни компонент billing"
|
||||
normalized_query: "объясни компонент billing"
|
||||
anchors:
|
||||
matched_aliases: ["component"]
|
||||
process_domain: "billing"
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_domain_entity
|
||||
layers: [D3_ENTITY_CATALOG, D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
metadata.domain: billing
|
||||
prefer_path_prefixes: [docs/domains/, docs/, docs/api/]
|
||||
prefer_like_patterns: ["%component%", "%billing%"]
|
||||
|
||||
- id: alias-only-api
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Объясни api health"
|
||||
normalized_query: "объясни api health"
|
||||
anchors:
|
||||
matched_aliases: ["api endpoint"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
layers: [D1_DOCUMENT_CATALOG, D2_FACT_INDEX, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
prefer_like_patterns: ["%api endpoint%"]
|
||||
|
||||
- id: alias-only-architecture
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Расскажи про architecture"
|
||||
normalized_query: "расскажи про architecture"
|
||||
anchors:
|
||||
matched_aliases: ["architecture"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_architecture
|
||||
layers: [D1_DOCUMENT_CATALOG, D5_RELATION_GRAPH, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes: [docs/architecture/, docs/]
|
||||
prefer_like_patterns: ["%architecture%"]
|
||||
|
||||
- id: partial-only-endpoint-path
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Что делает /status?"
|
||||
normalized_query: "что делает /status"
|
||||
anchors:
|
||||
endpoint_paths: ["/status"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
layers: [D1_DOCUMENT_CATALOG, D2_FACT_INDEX, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
path_prefixes: [docs/api/, docs/]
|
||||
prefer_like_patterns: ["%/status%"]
|
||||
|
||||
- id: partial-only-target-doc-hint
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Объясни notification loop"
|
||||
normalized_query: "объясни notification loop"
|
||||
anchors:
|
||||
target_doc_hints: ["docs/logic/notification-loop.md"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_logic_flow
|
||||
layers: [D4_WORKFLOW_INDEX, D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/logic/notification-loop.md"]
|
||||
prefer_path_prefixes: [docs/logic/, docs/architecture/, docs/]
|
||||
|
||||
- id: generic-neutral-with-nonsemantic-hint
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Дай общий summary intro docs"
|
||||
normalized_query: "дай общий summary intro docs"
|
||||
anchors:
|
||||
target_doc_hints: ["docs/intro/overview.md"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
target_doc_hints: ["docs/intro/overview.md"]
|
||||
prefer_path_prefixes: [docs/]
|
||||
|
||||
- id: generic-neutral-weak-mixed-aliases
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: SUMMARY
|
||||
user_query: "Нужен общий summary про architecture component"
|
||||
normalized_query: "нужен общий summary про architecture component"
|
||||
anchors:
|
||||
matched_aliases: ["architecture", "component"]
|
||||
expected:
|
||||
plan:
|
||||
profile: docs_summary_generic
|
||||
layers: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes: [docs/architecture/, docs/, docs/domains/, docs/api/]
|
||||
|
||||
- id: find-files-hard-priority-with-multiple-hints
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent: FIND_FILES
|
||||
user_query: "Найди документы по /health и runtime manager"
|
||||
normalized_query: "найди документы по /health и runtime manager"
|
||||
anchors:
|
||||
endpoint_paths: ["/health"]
|
||||
entity_names: ["RuntimeManager"]
|
||||
matched_aliases: ["architecture"]
|
||||
target_doc_hints:
|
||||
- "docs/api/health-endpoint.md"
|
||||
- "docs/architecture/runtime-manager.md"
|
||||
expected:
|
||||
plan:
|
||||
profile: file_lookup
|
||||
layers: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
target_doc_hints:
|
||||
- "docs/api/health-endpoint.md"
|
||||
- "docs/architecture/runtime-manager.md"
|
||||
path_prefixes: [docs/api/, docs/architecture/]
|
||||
prefer_like_patterns: ["%health-endpoint.md%", "%runtime-manager.md%"]
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
+199
@@ -0,0 +1,199 @@
|
||||
defaults:
|
||||
component: process_v2_router_plus_retrieval_policy
|
||||
|
||||
cases:
|
||||
- id: soft-architecture-summary
|
||||
query: "Как устроена архитектура приложения?"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_architecture, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-process-summary
|
||||
query: "Опиши процесс отправки уведомлений"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_logic_flow, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-domain-entity-summary
|
||||
query: "Что такое runtime health в документации?"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_domain_entity, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-runtime-health-document
|
||||
query: "Покажи документ про runtime health"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent_equals_any: [SUMMARY, FIND_FILES]
|
||||
retrieval_plan:
|
||||
profile_equals_any: [file_lookup, docs_summary_domain_entity, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-api-send-noisy
|
||||
query: "Нужен краткий док-саммари по api /send"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
|
||||
- id: soft-general-risks-architecture
|
||||
query: "Какие риски у такого подхода в архитектуре?"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [GENERAL, DOCS]
|
||||
intent_equals_any: [GENERAL_QA, DOC_EXPLAIN]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [general_qa_grounded_summary, docs_summary_architecture, docs_summary_generic]
|
||||
|
||||
- id: soft-general-polling-webhook
|
||||
query: "Сравни polling и webhook в контексте сервиса"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [GENERAL, DOCS]
|
||||
intent_equals_any: [GENERAL_QA, DOC_EXPLAIN]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [general_qa_grounded_summary, docs_summary_generic]
|
||||
|
||||
- id: soft-conflict-entity-plus-process
|
||||
query: "Объясни entity runtime health и runtime loop"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_domain_entity, docs_summary_generic]
|
||||
filters:
|
||||
prefer_path_prefixes_contains: [docs/domains/]
|
||||
|
||||
- id: soft-alias-handle-health
|
||||
query: "Объясни ручку /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
|
||||
- id: soft-alias-show-doc-handle-health
|
||||
query: "Покажи документ по ручке /health"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent_equals_any: [FIND_FILES, SUMMARY]
|
||||
retrieval_plan:
|
||||
profile_equals_any: [file_lookup, docs_summary_api_endpoint, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-alias-schema-overview
|
||||
query: "Нужен обзор по архитектуре notify app"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_architecture, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-alias-find-schema-file
|
||||
query: "Найди файл со схемой сервиса уведомлений"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent_equals_any: [FIND_FILES, SUMMARY]
|
||||
retrieval_plan:
|
||||
profile_equals_any: [file_lookup, docs_summary_architecture, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-process-domain-summary
|
||||
query: "Объясни overview по billing invoice flow"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
process_domain: present
|
||||
process_subdomain: present
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_logic_flow, docs_summary_generic, docs_summary_architecture]
|
||||
|
||||
- id: soft-process-domain-find-files
|
||||
query: "Найди файл по billing invoice flow"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
process_domain: present
|
||||
process_subdomain: present
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: soft-noisy-arch-overview
|
||||
query: "arch overview по notify app"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [DOCS, GENERAL]
|
||||
intent_equals_any: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_architecture, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: soft-noisy-file-send-endpoint
|
||||
query: "нужен файл где описан /send endpoint"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: soft-bare-file-token-preferences
|
||||
query: "health-endpoint.md"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
file_names_contains: ["health-endpoint.md"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: soft-doc-path-preferences
|
||||
query: "docs/api/health-endpoint.md"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
file_names_contains: ["docs/api/health-endpoint.md"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
+206
@@ -0,0 +1,206 @@
|
||||
defaults:
|
||||
component: process_v2_router_plus_retrieval_policy
|
||||
|
||||
cases:
|
||||
- id: strict-general-overview
|
||||
query: "Общий обзор сервиса"
|
||||
expected:
|
||||
router:
|
||||
domain: GENERAL
|
||||
intent: GENERAL_QA
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_not_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
retrieval_plan:
|
||||
profile: general_qa_grounded_summary
|
||||
layers_contains: [D1_DOCUMENT_CATALOG, D0_DOC_CHUNKS]
|
||||
limit: 8
|
||||
filters:
|
||||
prefer_path_prefixes_contains: [docs/architecture/, docs/]
|
||||
path_prefixes: absent
|
||||
|
||||
- id: strict-api-summary-health
|
||||
query: "Объясни endpoint /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
retrieval_plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
prefer_path_prefixes_contains: [docs/api/]
|
||||
|
||||
- id: strict-find-files-health-described
|
||||
query: "Где описан endpoint /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
layers_contains: [D1_DOCUMENT_CATALOG, D3_ENTITY_CATALOG]
|
||||
limit: 12
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
prefer_path_prefixes_contains: [docs/api/]
|
||||
|
||||
- id: strict-find-files-health-show-file
|
||||
query: "Покажи файл с описанием /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
|
||||
- id: strict-runtime-health-find-files
|
||||
query: "Где описан runtime health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains_any: [docs/domains/, docs/]
|
||||
|
||||
- id: strict-noisy-runtime-health-find-files
|
||||
query: "runtime health где описано в docs"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: strict-doc-path-is-file-lookup
|
||||
query: "docs/api/health-endpoint.md"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
file_names_contains: ["docs/api/health-endpoint.md"]
|
||||
endpoint_paths_not_contains: ["/api/health-endpoint.md"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
|
||||
- id: strict-file-token-is-file-lookup
|
||||
query: "health-endpoint.md"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
file_names_contains: ["health-endpoint.md"]
|
||||
endpoint_paths_not_contains: ["health-endpoint.md"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: strict-noisy-english-show-doc
|
||||
query: "pls show doc for /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
target_terms_not_contains: [pls, show, doc, for]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
|
||||
- id: strict-bare-endpoint-anchor-invariant
|
||||
query: "/health"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [GENERAL, DOCS]
|
||||
intent_equals_any: [GENERAL_QA, DOC_EXPLAIN]
|
||||
subintent: SUMMARY
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
|
||||
- id: strict-find-files-dominates-health-question
|
||||
query: "В каком файле описан `/health`?"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
|
||||
- id: strict-runtime-health-summary-not-file-lookup
|
||||
query: "Что делает runtime health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [docs_summary_domain_entity, docs_summary_generic]
|
||||
|
||||
- id: strict-general-purpose
|
||||
query: "Зачем нужен этот сервис?"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_equals_any: [GENERAL, DOCS]
|
||||
intent_equals_any: [GENERAL_QA, DOC_EXPLAIN]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_equals_any: [general_qa_grounded_summary, docs_summary_generic]
|
||||
|
||||
- id: strict-conflict-summary-goes-generic
|
||||
query: "Как устроена архитектура endpoint /send"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_generic
|
||||
filters:
|
||||
path_prefixes_contains: [docs/api/]
|
||||
prefer_path_prefixes_contains: [docs/api/, docs/architecture/]
|
||||
|
||||
- id: strict-find-files-dominates-mixed-signals
|
||||
query: "В каком файле описан architecture flow отправки уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
+115
@@ -0,0 +1,115 @@
|
||||
defaults:
|
||||
component: process_v2_router_plus_retrieval_policy
|
||||
|
||||
cases:
|
||||
- id: qg-t01-docs-overview-architecture
|
||||
query: "Объясни overview архитектуры сервиса уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_one_of: [docs_summary_architecture, docs_summary_generic]
|
||||
filters:
|
||||
prefer_path_prefixes_contains: [docs/architecture/]
|
||||
|
||||
- id: qg-t02-docs-overview-flow
|
||||
query: "Дай overview по flow отправки уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_one_of: [docs_summary_logic_flow, docs_summary_generic]
|
||||
filters:
|
||||
prefer_path_prefixes_contains: [docs/logic/]
|
||||
|
||||
- id: qg-t03-soft-arch-overview-notify
|
||||
query: "Arch overview по notify app"
|
||||
expected:
|
||||
route:
|
||||
routing_domain_one_of: [DOCS, GENERAL]
|
||||
intent_one_of: [DOC_EXPLAIN, GENERAL_QA]
|
||||
subintent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile_one_of: [docs_summary_architecture, docs_summary_generic, general_qa_grounded_summary]
|
||||
|
||||
- id: qg-t04-process-summary-filters
|
||||
query: "Объясни billing invoice process"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
process_domain: present
|
||||
process_subdomain: present
|
||||
retrieval_plan:
|
||||
if_anchor_present_then_filter_present:
|
||||
- anchor: anchors.process_domain
|
||||
filter: filters.metadata.domain
|
||||
- anchor: anchors.process_subdomain
|
||||
filter: filters.metadata.subdomain
|
||||
profile_one_of: [docs_summary_logic_flow, docs_summary_generic]
|
||||
|
||||
- id: qg-t05-process-find-files-filters
|
||||
query: "Найди файл по billing invoice process"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
process_domain: present
|
||||
process_subdomain: present
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
if_anchor_present_then_filter_present:
|
||||
- anchor: anchors.process_domain
|
||||
filter: filters.metadata.domain
|
||||
- anchor: anchors.process_subdomain
|
||||
filter: filters.metadata.subdomain
|
||||
filters:
|
||||
prefer_path_prefixes_contains_any: [docs/domains/, docs/logic/]
|
||||
|
||||
- id: qg-t06-soft-process-shaped-input
|
||||
query: "billing invoice docs"
|
||||
expected:
|
||||
route:
|
||||
routing_domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
subintent_one_of: [FIND_FILES, SUMMARY]
|
||||
retrieval_plan:
|
||||
profile_one_of: [file_lookup, docs_summary_logic_flow, docs_summary_generic]
|
||||
|
||||
- id: qg-t07-clean-target-terms-architecture
|
||||
query: "Объясни architecture overview сервиса уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
target_terms_not_contains: ["объясни", "overview", "architecture"]
|
||||
retrieval_plan:
|
||||
profile_one_of: [docs_summary_architecture, docs_summary_generic]
|
||||
|
||||
- id: qg-t08-clean-target-terms-file-query
|
||||
query: "Найди doc for /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
target_terms_contains: ["/health"]
|
||||
target_terms_not_contains: ["найди", "doc", "for"]
|
||||
anchors:
|
||||
endpoint_paths_contains: ["/health"]
|
||||
file_names_not_contains: ["/health"]
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
BIN
Binary file not shown.
+193
@@ -0,0 +1,193 @@
|
||||
defaults:
|
||||
component: process_v2_router_plus_retrieval_policy_rag
|
||||
rag_session_id: "694cd10b-3842-4579-8d53-e54ec4291eae"
|
||||
|
||||
cases:
|
||||
- id: rag-t01-architecture-summary
|
||||
query: "Объясни overview архитектуры сервиса уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_architecture
|
||||
filters:
|
||||
prefer_path_prefixes_contains:
|
||||
- "docs/architecture/"
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
layers_contains:
|
||||
- "D5_RELATION_GRAPH"
|
||||
- "D1_DOCUMENT_CATALOG"
|
||||
|
||||
- id: rag-t02-docs-index-find-files
|
||||
query: "Найди файл-индекс документации проекта"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/README.md"
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains:
|
||||
- "docs/"
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/README.md"
|
||||
layers_contains:
|
||||
- "D1_DOCUMENT_CATALOG"
|
||||
|
||||
- id: rag-t03-general-docs-overview
|
||||
query: "Что входит в документацию этого проекта?"
|
||||
expected:
|
||||
router:
|
||||
domain: GENERAL
|
||||
intent: GENERAL_QA
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: general_qa_grounded_summary
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/README.md"
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
layers_contains:
|
||||
- "D1_DOCUMENT_CATALOG"
|
||||
- "D0_DOC_CHUNKS"
|
||||
|
||||
- id: rag-t04-errors-catalog-find-files
|
||||
query: "В каком файле лежит каталог ошибок?"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
filters:
|
||||
path_prefixes_contains:
|
||||
- "docs/errors/"
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
layers_contains:
|
||||
- "D1_DOCUMENT_CATALOG"
|
||||
|
||||
- id: rag-t05-errors-catalog-general
|
||||
query: "Объясни каталог ошибок"
|
||||
expected:
|
||||
router:
|
||||
domain: GENERAL
|
||||
intent: GENERAL_QA
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
retrieval_plan:
|
||||
profile: general_qa_grounded_summary
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
|
||||
- id: rag-t06-health-summary-chain
|
||||
query: "Объясни endpoint /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains:
|
||||
- "/health"
|
||||
file_names_not_contains:
|
||||
- "/health"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
filters:
|
||||
prefer_path_prefixes_contains:
|
||||
- "docs/api/"
|
||||
rag:
|
||||
paths_contains_any:
|
||||
- "docs/README.md"
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
layers_contains:
|
||||
- "D2_FACT_INDEX"
|
||||
|
||||
- id: rag-t07-health-find-files-empty
|
||||
query: "Где описан endpoint /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains:
|
||||
- "/health"
|
||||
target_doc_hints_contains:
|
||||
- "docs/api/health-endpoint.md"
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
rag:
|
||||
row_count: 0
|
||||
paths: absent
|
||||
layers: absent
|
||||
|
||||
- id: rag-t08-notifications-workflow-metadata
|
||||
query: "Объясни notifications workflow"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
process_domain: notifications
|
||||
retrieval_plan:
|
||||
profile: docs_summary_logic_flow
|
||||
filters:
|
||||
metadata.domain: notifications
|
||||
prefer_path_prefixes_contains:
|
||||
- "docs/logic/"
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
metadata_domains_contains:
|
||||
- "notifications"
|
||||
|
||||
- id: rag-t09-mixed-summary-generic
|
||||
query: "Как архитектурно устроен endpoint /send"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains:
|
||||
- "/send"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_generic
|
||||
filters:
|
||||
prefer_path_prefixes_contains:
|
||||
- "docs/api/"
|
||||
- "docs/architecture/"
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
BIN
Binary file not shown.
@@ -0,0 +1,180 @@
|
||||
defaults:
|
||||
component: process_v2_full_chain
|
||||
rag_session_id: "694cd10b-3842-4579-8d53-e54ec4291eae"
|
||||
|
||||
cases:
|
||||
- id: full-t01-general-docs-overview
|
||||
query: "Что входит в документацию этого проекта?"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_generic
|
||||
rag:
|
||||
row_count: 0
|
||||
pipeline:
|
||||
answer_mode: insufficient_evidence
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "не найден"
|
||||
- "документ"
|
||||
|
||||
- id: full-t02-architecture-summary
|
||||
query: "Объясни overview архитектуры сервиса уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_architecture
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
pipeline:
|
||||
answer_mode: grounded_summary
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_any:
|
||||
- ["RuntimeManager", "TelegramControlChannel"]
|
||||
- ["worker", "Telegram"]
|
||||
|
||||
- id: full-t03-runtime-health-summary
|
||||
query: "Что такое runtime health в этой документации?"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_domain_entity
|
||||
rag:
|
||||
row_count: 0
|
||||
pipeline:
|
||||
answer_mode: insufficient_evidence
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "не найден"
|
||||
- "документ"
|
||||
|
||||
- id: full-t04-logic-flow-summary
|
||||
query: "Кратко опиши цикл отправки уведомлений"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
retrieval_plan:
|
||||
profile: docs_summary_logic_flow
|
||||
rag:
|
||||
row_count: 0
|
||||
pipeline:
|
||||
answer_mode: insufficient_evidence
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "не найден"
|
||||
- "документ"
|
||||
|
||||
- id: full-t05-errors-catalog-find-files
|
||||
query: "В каком файле лежит каталог ошибок?"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/errors/catalog.yaml"
|
||||
pipeline:
|
||||
answer_mode: deterministic
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "docs/errors/catalog.yaml"
|
||||
|
||||
- id: full-t06-docs-index-find-files
|
||||
query: "Найди файл-индекс документации проекта"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: FIND_FILES
|
||||
route:
|
||||
anchors:
|
||||
target_doc_hints_contains:
|
||||
- "docs/README.md"
|
||||
retrieval_plan:
|
||||
profile: file_lookup
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/README.md"
|
||||
pipeline:
|
||||
answer_mode: deterministic
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "docs/README.md"
|
||||
|
||||
- id: full-t07-mixed-generic-summary
|
||||
query: "Как архитектурно устроен endpoint /send"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains:
|
||||
- "/send"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_generic
|
||||
rag:
|
||||
paths_contains:
|
||||
- "docs/architecture/telegram-notify-app-overview.md"
|
||||
pipeline:
|
||||
answer_mode: grounded_summary
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_any:
|
||||
- ["Telegram", "/send"]
|
||||
- ["архитект", "endpoint"]
|
||||
|
||||
- id: full-t08-health-boundary
|
||||
query: "Объясни endpoint /health"
|
||||
expected:
|
||||
router:
|
||||
domain: DOCS
|
||||
intent: DOC_EXPLAIN
|
||||
sub_intent: SUMMARY
|
||||
route:
|
||||
anchors:
|
||||
endpoint_paths_contains:
|
||||
- "/health"
|
||||
file_names_not_contains:
|
||||
- "/health"
|
||||
retrieval_plan:
|
||||
profile: docs_summary_api_endpoint
|
||||
rag:
|
||||
row_count: 0
|
||||
pipeline:
|
||||
answer_mode: insufficient_evidence
|
||||
llm:
|
||||
non_empty: true
|
||||
contains_all:
|
||||
- "не найден"
|
||||
- "документ"
|
||||
@@ -64,8 +64,8 @@ class ArtifactWriter:
|
||||
f"- source_file: {result.case.source_file.as_posix()}",
|
||||
f"- passed: {result.passed}",
|
||||
"",
|
||||
"## Query",
|
||||
result.case.query,
|
||||
"## Input",
|
||||
result.case.display_input,
|
||||
"",
|
||||
"## Actual",
|
||||
"```json",
|
||||
@@ -96,7 +96,7 @@ class SummaryComposer:
|
||||
]
|
||||
for item in results:
|
||||
lines.append(
|
||||
f"| {item.case.case_id} | {item.case.component} | {self._cell(item.case.query)} | "
|
||||
f"| {item.case.case_id} | {item.case.component} | {self._cell(item.case.display_input)} | "
|
||||
f"{item.actual.get('intent') or '—'} | {item.actual.get('sub_intent') or '—'} | "
|
||||
f"{'✓' if item.passed else '✗'} |"
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from tests.pipeline_setup_v4.core.models import CaseExpectations, RouterExpectation, V4Case
|
||||
from tests.pipeline_setup_v4.core.models import CaseExpectations, RetrievalPlanExpectation, RouterExpectation, V4Case
|
||||
|
||||
|
||||
class CaseDirectoryLoader:
|
||||
@@ -35,13 +35,28 @@ class CaseDirectoryLoader:
|
||||
case_id = str(raw.get("id") or "").strip()
|
||||
component = str(raw.get("component") or defaults.get("component") or "").strip()
|
||||
query = str(raw.get("query") or "").strip()
|
||||
if not case_id or not component or not query:
|
||||
raise ValueError(f"Invalid case in {path}: `id`, `component`, `query` are required")
|
||||
rag_session_id = str(raw.get("rag_session_id") or defaults.get("rag_session_id") or "").strip() or None
|
||||
route = dict(raw.get("route") or {})
|
||||
if not route and isinstance(defaults.get("route"), dict):
|
||||
route = dict(defaults.get("route") or {})
|
||||
if not case_id or not component:
|
||||
raise ValueError(f"Invalid case in {path}: `id` and `component` are required")
|
||||
if component in {
|
||||
"process_v2_intent_router",
|
||||
"process_v2_router_plus_retrieval_policy",
|
||||
"process_v2_router_plus_retrieval_policy_rag",
|
||||
"process_v2_full_chain",
|
||||
} and not query:
|
||||
raise ValueError(f"Invalid case in {path}: `query` is required for {component}")
|
||||
if component == "process_v2_retrieval_policy_resolver" and not route:
|
||||
raise ValueError(f"Invalid case in {path}: `route` is required for {component}")
|
||||
expected = dict(raw.get("expected") or {})
|
||||
return V4Case(
|
||||
case_id=case_id,
|
||||
component=component, # type: ignore[arg-type]
|
||||
query=query,
|
||||
rag_session_id=rag_session_id,
|
||||
route=route,
|
||||
source_file=path,
|
||||
expectations=self._to_expectations(expected),
|
||||
notes=str(raw.get("notes") or ""),
|
||||
@@ -50,10 +65,38 @@ class CaseDirectoryLoader:
|
||||
|
||||
def _to_expectations(self, raw: dict) -> CaseExpectations:
|
||||
router = dict(raw.get("router") or {})
|
||||
route = dict(raw.get("route") or {})
|
||||
retrieval_plan = dict(raw.get("retrieval_plan") or raw.get("plan") or {})
|
||||
rag = dict(raw.get("rag") or {})
|
||||
pipeline = dict(raw.get("pipeline") or {})
|
||||
llm = dict(raw.get("llm") or {})
|
||||
return CaseExpectations(
|
||||
router=RouterExpectation(
|
||||
domain=str(router.get("domain") or "").strip() or None,
|
||||
intent=str(router.get("intent") or "").strip() or None,
|
||||
sub_intent=str(router.get("sub_intent") or "").strip() or None,
|
||||
)
|
||||
),
|
||||
retrieval_plan=RetrievalPlanExpectation(
|
||||
profile=str(retrieval_plan.get("profile") or "").strip() or None,
|
||||
layers=tuple(str(item).strip() for item in retrieval_plan.get("layers") or [] if str(item).strip()),
|
||||
limit=int(retrieval_plan["limit"]) if retrieval_plan.get("limit") is not None else None,
|
||||
filters=self._plain_mapping(dict(retrieval_plan.get("filters") or {})),
|
||||
),
|
||||
route_assertions=route,
|
||||
retrieval_plan_assertions=retrieval_plan,
|
||||
rag_assertions=rag,
|
||||
pipeline_assertions=pipeline,
|
||||
llm_assertions=llm,
|
||||
)
|
||||
|
||||
def _plain_mapping(self, raw: dict[str, object]) -> dict[str, object]:
|
||||
plain: dict[str, object] = {}
|
||||
for key, value in raw.items():
|
||||
if self._is_assertion_key(key) or value in {"present", "absent"}:
|
||||
continue
|
||||
plain[key] = value
|
||||
return plain
|
||||
|
||||
def _is_assertion_key(self, key: str) -> bool:
|
||||
suffixes = ("_not_contains", "_contains_any", "_contains", "_equals_any", "_one_of")
|
||||
return any(key.endswith(suffix) for suffix in suffixes)
|
||||
|
||||
@@ -5,7 +5,13 @@ from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
|
||||
ComponentKind = Literal["process_v2_intent_router"]
|
||||
ComponentKind = Literal[
|
||||
"process_v2_intent_router",
|
||||
"process_v2_retrieval_policy_resolver",
|
||||
"process_v2_router_plus_retrieval_policy",
|
||||
"process_v2_router_plus_retrieval_policy_rag",
|
||||
"process_v2_full_chain",
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
@@ -15,21 +21,41 @@ class RouterExpectation:
|
||||
sub_intent: str | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class RetrievalPlanExpectation:
|
||||
profile: str | None = None
|
||||
layers: tuple[str, ...] = ()
|
||||
limit: int | None = None
|
||||
filters: dict[str, object] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class CaseExpectations:
|
||||
router: RouterExpectation = RouterExpectation()
|
||||
retrieval_plan: RetrievalPlanExpectation = field(default_factory=RetrievalPlanExpectation)
|
||||
route_assertions: dict[str, object] = field(default_factory=dict)
|
||||
retrieval_plan_assertions: dict[str, object] = field(default_factory=dict)
|
||||
rag_assertions: dict[str, object] = field(default_factory=dict)
|
||||
pipeline_assertions: dict[str, object] = field(default_factory=dict)
|
||||
llm_assertions: dict[str, object] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class V4Case:
|
||||
case_id: str
|
||||
component: ComponentKind
|
||||
query: str
|
||||
source_file: Path
|
||||
expectations: CaseExpectations = CaseExpectations()
|
||||
query: str = ""
|
||||
rag_session_id: str | None = None
|
||||
route: dict[str, object] = field(default_factory=dict)
|
||||
expectations: CaseExpectations = field(default_factory=CaseExpectations)
|
||||
notes: str = ""
|
||||
tags: tuple[str, ...] = ()
|
||||
|
||||
@property
|
||||
def display_input(self) -> str:
|
||||
return self.query or self.route.get("user_query") or self.case_id
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class ExecutionPayload:
|
||||
|
||||
@@ -1,17 +1,249 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping, Sequence
|
||||
|
||||
from tests.pipeline_setup_v4.core.models import V4Case
|
||||
|
||||
|
||||
class CaseValidator:
|
||||
def validate(self, case: V4Case, actual: dict) -> list[str]:
|
||||
if case.component == "process_v2_intent_router":
|
||||
return self._validate_router(case, actual)
|
||||
if case.component == "process_v2_retrieval_policy_resolver":
|
||||
return self._validate_retrieval_plan(case, actual)
|
||||
if case.component == "process_v2_router_plus_retrieval_policy":
|
||||
return self._validate_router(case, actual) + self._validate_retrieval_plan(case, actual)
|
||||
if case.component == "process_v2_router_plus_retrieval_policy_rag":
|
||||
return self._validate_router(case, actual) + self._validate_retrieval_plan(case, actual) + self._validate_rag(case, actual)
|
||||
if case.component == "process_v2_full_chain":
|
||||
return (
|
||||
self._validate_router(case, actual)
|
||||
+ self._validate_retrieval_plan(case, actual)
|
||||
+ self._validate_rag(case, actual)
|
||||
+ self._validate_pipeline(case, actual)
|
||||
+ self._validate_llm(case, actual)
|
||||
)
|
||||
return [f"unsupported component for validation: {case.component}"]
|
||||
|
||||
def _validate_router(self, case: V4Case, actual: dict) -> list[str]:
|
||||
mismatches: list[str] = []
|
||||
expected = case.expectations.router
|
||||
self._check(expected.domain, actual.get("domain"), "domain", mismatches)
|
||||
self._check(expected.intent, actual.get("intent"), "intent", mismatches)
|
||||
self._check(expected.sub_intent, actual.get("sub_intent"), "sub_intent", mismatches)
|
||||
self._check_scalar(expected.domain, actual.get("domain"), "domain", mismatches)
|
||||
self._check_scalar(expected.intent, actual.get("intent"), "intent", mismatches)
|
||||
self._check_scalar(expected.sub_intent, actual.get("sub_intent"), "sub_intent", mismatches)
|
||||
route_actual = actual.get("route")
|
||||
if isinstance(route_actual, Mapping):
|
||||
self._check_assertions(case.expectations.route_assertions, route_actual, "route", mismatches)
|
||||
return mismatches
|
||||
|
||||
def _check(self, expected: str | None, actual: object, label: str, mismatches: list[str]) -> None:
|
||||
def _validate_retrieval_plan(self, case: V4Case, actual: dict) -> list[str]:
|
||||
mismatches: list[str] = []
|
||||
expected = case.expectations.retrieval_plan
|
||||
self._check_scalar(expected.profile, actual.get("profile"), "profile", mismatches)
|
||||
if expected.layers:
|
||||
self._check_scalar(list(expected.layers), actual.get("layers"), "layers", mismatches)
|
||||
self._check_scalar(expected.limit, actual.get("limit"), "limit", mismatches)
|
||||
self._check_subset(expected.filters, actual.get("filters"), "filters", mismatches)
|
||||
plan_actual = actual.get("retrieval_plan")
|
||||
if isinstance(plan_actual, Mapping):
|
||||
self._check_assertions(case.expectations.retrieval_plan_assertions, plan_actual, "retrieval_plan", mismatches)
|
||||
self._check_conditional_filter_assertions(case.expectations.retrieval_plan_assertions, actual, mismatches)
|
||||
return mismatches
|
||||
|
||||
def _validate_rag(self, case: V4Case, actual: dict) -> list[str]:
|
||||
mismatches: list[str] = []
|
||||
rag_actual = actual.get("rag")
|
||||
if isinstance(rag_actual, Mapping):
|
||||
self._check_assertions(case.expectations.rag_assertions, rag_actual, "rag", mismatches)
|
||||
elif case.expectations.rag_assertions:
|
||||
mismatches.append("rag: expected mapping, got missing")
|
||||
return mismatches
|
||||
|
||||
def _validate_pipeline(self, case: V4Case, actual: dict) -> list[str]:
|
||||
mismatches: list[str] = []
|
||||
pipeline_actual = actual.get("pipeline")
|
||||
if isinstance(pipeline_actual, Mapping):
|
||||
self._check_assertions(case.expectations.pipeline_assertions, pipeline_actual, "pipeline", mismatches)
|
||||
elif case.expectations.pipeline_assertions:
|
||||
mismatches.append("pipeline: expected mapping, got missing")
|
||||
return mismatches
|
||||
|
||||
def _validate_llm(self, case: V4Case, actual: dict) -> list[str]:
|
||||
mismatches: list[str] = []
|
||||
expected = case.expectations.llm_assertions
|
||||
if not expected:
|
||||
return mismatches
|
||||
llm_actual = actual.get("llm")
|
||||
if not isinstance(llm_actual, Mapping):
|
||||
mismatches.append("llm: expected mapping, got missing")
|
||||
return mismatches
|
||||
answer = str(llm_actual.get("answer") or "")
|
||||
lowered = answer.lower()
|
||||
if "non_empty" in expected:
|
||||
want_non_empty = bool(expected.get("non_empty"))
|
||||
if want_non_empty and not answer.strip():
|
||||
mismatches.append("llm.non_empty: expected non-empty answer")
|
||||
if not want_non_empty and answer.strip():
|
||||
mismatches.append("llm.non_empty: expected empty answer")
|
||||
if "contains_all" in expected:
|
||||
missing = [token for token in self._string_list(expected.get("contains_all")) if token.lower() not in lowered]
|
||||
if missing:
|
||||
mismatches.append(f"llm.contains_all: missing {missing}")
|
||||
if "contains_any" in expected and not self._matches_contains_any(lowered, expected.get("contains_any")):
|
||||
mismatches.append(f"llm.contains_any: no expected variant matched answer '{answer[:200]}'")
|
||||
for key, value in expected.items():
|
||||
if key in {"non_empty", "contains_all", "contains_any"}:
|
||||
continue
|
||||
if key not in llm_actual:
|
||||
mismatches.append(f"llm.{key}: missing")
|
||||
continue
|
||||
self._check_assertions(value, llm_actual.get(key), f"llm.{key}", mismatches)
|
||||
return mismatches
|
||||
|
||||
def _check_scalar(self, expected: object, actual: object, label: str, mismatches: list[str]) -> None:
|
||||
if expected is not None and expected != actual:
|
||||
mismatches.append(f"{label}: expected {expected}, got {actual}")
|
||||
|
||||
def _check_subset(self, expected: object, actual: object, label: str, mismatches: list[str]) -> None:
|
||||
if expected in (None, {}, []):
|
||||
return
|
||||
if isinstance(expected, Mapping):
|
||||
if not isinstance(actual, Mapping):
|
||||
mismatches.append(f"{label}: expected dict subset, got {actual}")
|
||||
return
|
||||
for key, value in expected.items():
|
||||
next_label = f"{label}.{key}"
|
||||
if key not in actual:
|
||||
mismatches.append(f"{next_label}: missing")
|
||||
continue
|
||||
self._check_subset(value, actual.get(key), next_label, mismatches)
|
||||
return
|
||||
if expected != actual:
|
||||
mismatches.append(f"{label}: expected {expected}, got {actual}")
|
||||
|
||||
def _check_assertions(self, expected: object, actual: object, label: str, mismatches: list[str]) -> None:
|
||||
if expected in (None, {}, []):
|
||||
return
|
||||
if not isinstance(expected, Mapping):
|
||||
self._check_scalar(expected, actual, label, mismatches)
|
||||
return
|
||||
if not isinstance(actual, Mapping):
|
||||
mismatches.append(f"{label}: expected mapping, got {actual}")
|
||||
return
|
||||
for key, value in expected.items():
|
||||
if key == "if_anchor_present_then_filter_present":
|
||||
continue
|
||||
if key.endswith("_not_contains"):
|
||||
self._assert_not_contains(actual.get(key.removesuffix("_not_contains")), value, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if key.endswith("_contains"):
|
||||
self._assert_contains(actual.get(key.removesuffix("_contains")), value, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if key.endswith("_contains_any"):
|
||||
self._assert_contains_any(actual.get(key.removesuffix("_contains_any")), value, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if key.endswith("_equals_any"):
|
||||
self._assert_equals_any(actual.get(key.removesuffix("_equals_any")), value, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if key.endswith("_one_of"):
|
||||
self._assert_equals_any(actual.get(key.removesuffix("_one_of")), value, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if value == "present":
|
||||
self._assert_present(actual.get(key), f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if value == "absent":
|
||||
self._assert_absent(actual, key, f"{label}.{key}", mismatches)
|
||||
continue
|
||||
if key not in actual:
|
||||
mismatches.append(f"{label}.{key}: missing")
|
||||
continue
|
||||
self._check_assertions(value, actual.get(key), f"{label}.{key}", mismatches)
|
||||
|
||||
def _assert_contains(self, actual: object, expected: object, label: str, mismatches: list[str]) -> None:
|
||||
actual_list = self._as_list(actual)
|
||||
expected_list = self._as_list(expected)
|
||||
missing = [item for item in expected_list if item not in actual_list]
|
||||
if missing:
|
||||
mismatches.append(f"{label}: missing {missing}, got {actual_list}")
|
||||
|
||||
def _assert_not_contains(self, actual: object, expected: object, label: str, mismatches: list[str]) -> None:
|
||||
actual_list = self._as_list(actual)
|
||||
expected_list = self._as_list(expected)
|
||||
present = [item for item in expected_list if item in actual_list]
|
||||
if present:
|
||||
mismatches.append(f"{label}: unexpected {present}, got {actual_list}")
|
||||
|
||||
def _assert_contains_any(self, actual: object, expected: object, label: str, mismatches: list[str]) -> None:
|
||||
actual_list = self._as_list(actual)
|
||||
expected_list = self._as_list(expected)
|
||||
if not any(item in actual_list for item in expected_list):
|
||||
mismatches.append(f"{label}: expected any of {expected_list}, got {actual_list}")
|
||||
|
||||
def _assert_equals_any(self, actual: object, expected: object, label: str, mismatches: list[str]) -> None:
|
||||
expected_list = self._as_list(expected)
|
||||
if actual not in expected_list:
|
||||
mismatches.append(f"{label}: expected any of {expected_list}, got {actual}")
|
||||
|
||||
def _assert_present(self, actual: object, label: str, mismatches: list[str]) -> None:
|
||||
if actual is None or actual == "" or actual == [] or actual == {}:
|
||||
mismatches.append(f"{label}: expected present, got {actual}")
|
||||
|
||||
def _assert_absent(self, actual: Mapping, key: str, label: str, mismatches: list[str]) -> None:
|
||||
if key in actual and actual.get(key) not in (None, "", [], {}):
|
||||
mismatches.append(f"{label}: expected absent, got {actual.get(key)}")
|
||||
|
||||
def _check_conditional_filter_assertions(self, expected: object, actual: Mapping, mismatches: list[str]) -> None:
|
||||
if not isinstance(expected, Mapping):
|
||||
return
|
||||
rules = expected.get("if_anchor_present_then_filter_present")
|
||||
if not isinstance(rules, Sequence) or isinstance(rules, (str, bytes, bytearray)):
|
||||
return
|
||||
for idx, rule in enumerate(rules):
|
||||
if not isinstance(rule, Mapping):
|
||||
continue
|
||||
anchor_path = str(rule.get("anchor") or "").strip()
|
||||
filter_path = str(rule.get("filter") or "").strip()
|
||||
if not anchor_path or not filter_path:
|
||||
continue
|
||||
anchor_value = self._resolve_path(actual.get("route"), anchor_path)
|
||||
if anchor_value in (None, "", [], {}):
|
||||
continue
|
||||
filter_value = self._resolve_path(actual.get("retrieval_plan"), filter_path)
|
||||
if filter_value in (None, "", [], {}):
|
||||
mismatches.append(
|
||||
f"conditional[{idx}]: expected {filter_path} present because {anchor_path} is present"
|
||||
)
|
||||
|
||||
def _resolve_path(self, value: object, path: str) -> object:
|
||||
current = value
|
||||
parts = [item for item in path.split(".") if item]
|
||||
for idx, part in enumerate(parts):
|
||||
if not isinstance(current, Mapping):
|
||||
return None
|
||||
remainder = ".".join(parts[idx:])
|
||||
if remainder in current:
|
||||
return current.get(remainder)
|
||||
if part not in current:
|
||||
return None
|
||||
current = current.get(part)
|
||||
return current
|
||||
|
||||
def _as_list(self, value: object) -> list[object]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)):
|
||||
return list(value)
|
||||
return [value]
|
||||
|
||||
def _string_list(self, value: object) -> list[str]:
|
||||
return [str(item) for item in self._as_list(value) if str(item).strip()]
|
||||
|
||||
def _matches_contains_any(self, lowered_answer: str, expected: object) -> bool:
|
||||
variants = self._as_list(expected)
|
||||
for variant in variants:
|
||||
tokens = self._string_list(variant)
|
||||
if not tokens:
|
||||
continue
|
||||
if all(token.lower() in lowered_answer for token in tokens):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
"""Run full `process v2` flow in the v4 harness.
|
||||
|
||||
This module adapts the existing v3 `V2ProcessAdapter` so pipeline_setup_v4 can
|
||||
execute the real route -> retrieval -> evidence -> workflow LLM chain without
|
||||
duplicating runtime logic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.pipeline_setup_v3.core.models import CaseExpectations, CaseInput, V3Case
|
||||
from tests.pipeline_setup_v3.runtime.v2_process_adapter import V2ProcessAdapter
|
||||
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
|
||||
|
||||
|
||||
class ProcessV2FullChainExecutor:
|
||||
def __init__(self) -> None:
|
||||
self._adapter = V2ProcessAdapter(workflow_llm_enabled=True)
|
||||
|
||||
def execute(self, case: V4Case) -> ExecutionPayload:
|
||||
if not case.rag_session_id:
|
||||
raise ValueError(f"Case '{case.case_id}' requires rag_session_id")
|
||||
payload = self._adapter.execute(self._build_case(case), case.rag_session_id)
|
||||
route = dict(payload.details.get("router_result") or {})
|
||||
retrieval_plan = dict(payload.details.get("retrieval_plan") or {})
|
||||
rows = list(payload.details.get("rows") or [])
|
||||
rag_summary = _summarize_rows(rows)
|
||||
pipeline_steps = list(payload.details.get("pipeline_steps") or [])
|
||||
pipeline_summary = {
|
||||
"answer_mode": str(payload.actual.get("answer_mode") or ""),
|
||||
"workflow_llm_enabled": True,
|
||||
"step_count": len(pipeline_steps),
|
||||
"steps": [str(step.get("step") or "") for step in pipeline_steps if str(step.get("step") or "").strip()],
|
||||
}
|
||||
answer = str(payload.details.get("answer") or payload.actual.get("llm_answer") or "")
|
||||
actual = {
|
||||
"domain": payload.actual.get("domain"),
|
||||
"intent": payload.actual.get("intent"),
|
||||
"sub_intent": payload.actual.get("sub_intent"),
|
||||
"profile": retrieval_plan.get("profile"),
|
||||
"layers": list(retrieval_plan.get("layers") or []),
|
||||
"limit": retrieval_plan.get("limit"),
|
||||
"filters": dict(retrieval_plan.get("filters") or {}),
|
||||
"answer_mode": payload.actual.get("answer_mode"),
|
||||
"route": {
|
||||
"routing_domain": route.get("routing_domain"),
|
||||
"intent": route.get("intent"),
|
||||
"subintent": route.get("subintent"),
|
||||
"target_terms": list(route.get("target_terms") or []),
|
||||
"anchors": dict(route.get("anchors") or {}),
|
||||
},
|
||||
"retrieval_plan": {
|
||||
"profile": retrieval_plan.get("profile"),
|
||||
"layers": list(retrieval_plan.get("layers") or []),
|
||||
"limit": retrieval_plan.get("limit"),
|
||||
"filters": dict(retrieval_plan.get("filters") or {}),
|
||||
},
|
||||
"rag": rag_summary,
|
||||
"pipeline": pipeline_summary,
|
||||
"llm": {
|
||||
"answer": answer,
|
||||
"non_empty": bool(answer.strip()),
|
||||
"length": len(answer),
|
||||
},
|
||||
}
|
||||
details = {
|
||||
"query": case.query,
|
||||
"rag_session_id": case.rag_session_id,
|
||||
"route": route,
|
||||
"retrieval_plan": actual["retrieval_plan"],
|
||||
"rag": {
|
||||
**rag_summary,
|
||||
"rows": rows[:20],
|
||||
},
|
||||
"pipeline": pipeline_summary,
|
||||
"answer": answer,
|
||||
"pipeline_steps": pipeline_steps,
|
||||
"logs": list(payload.details.get("logs") or []),
|
||||
"evidence": dict(payload.details.get("evidence") or {}),
|
||||
}
|
||||
return ExecutionPayload(actual=actual, details=details)
|
||||
|
||||
def _build_case(self, case: V4Case) -> V3Case:
|
||||
return V3Case(
|
||||
case_id=case.case_id,
|
||||
runner="process_v2",
|
||||
mode="full_chain",
|
||||
query=case.query,
|
||||
source_file=case.source_file,
|
||||
input=CaseInput(rag_session_id=case.rag_session_id),
|
||||
expectations=CaseExpectations(),
|
||||
notes=case.notes,
|
||||
tags=case.tags,
|
||||
)
|
||||
|
||||
|
||||
def _summarize_rows(rows: list[dict]) -> dict[str, object]:
|
||||
paths: list[str] = []
|
||||
layers: list[str] = []
|
||||
metadata_domains: list[str] = []
|
||||
metadata_subdomains: list[str] = []
|
||||
for row in rows:
|
||||
path = str(row.get("path") or "").strip()
|
||||
layer = str(row.get("layer") or "").strip()
|
||||
metadata = dict(row.get("metadata") or {})
|
||||
domain = str(metadata.get("domain") or "").strip()
|
||||
subdomain = str(metadata.get("subdomain") or "").strip()
|
||||
if path and path not in paths:
|
||||
paths.append(path)
|
||||
if layer and layer not in layers:
|
||||
layers.append(layer)
|
||||
if domain and domain not in metadata_domains:
|
||||
metadata_domains.append(domain)
|
||||
if subdomain and subdomain not in metadata_subdomains:
|
||||
metadata_subdomains.append(subdomain)
|
||||
return {
|
||||
"row_count": len(rows),
|
||||
"paths": paths,
|
||||
"layers": layers,
|
||||
"metadata_domains": metadata_domains,
|
||||
"metadata_subdomains": metadata_subdomains,
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
|
||||
from app.core.agent.processes.v2.models import V2RouteAnchors, V2RouteResult
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
|
||||
|
||||
|
||||
class ProcessV2RetrievalPolicyExecutor:
|
||||
def __init__(self) -> None:
|
||||
self._resolver = V2RetrievalPolicyResolver()
|
||||
|
||||
def execute(self, case: V4Case) -> ExecutionPayload:
|
||||
route = self._build_route(case.route)
|
||||
plan = self._resolver.resolve(route)
|
||||
actual = {
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
}
|
||||
details = {
|
||||
"route": asdict(route),
|
||||
"plan": actual,
|
||||
}
|
||||
return ExecutionPayload(actual=actual, details=details)
|
||||
|
||||
def _build_route(self, raw: dict[str, object]) -> V2RouteResult:
|
||||
anchors_raw = dict(raw.get("anchors") or {})
|
||||
return V2RouteResult(
|
||||
routing_domain=str(raw.get("routing_domain") or ""),
|
||||
intent=str(raw.get("intent") or ""),
|
||||
subintent=str(raw.get("subintent") or ""),
|
||||
user_query=str(raw.get("user_query") or raw.get("normalized_query") or raw.get("name") or "resolver case"),
|
||||
normalized_query=str(raw.get("normalized_query") or raw.get("user_query") or "resolver case"),
|
||||
target_terms=[str(item) for item in raw.get("target_terms") or [] if str(item).strip()],
|
||||
anchors=V2RouteAnchors(
|
||||
entity_names=[str(item) for item in anchors_raw.get("entity_names") or [] if str(item).strip()],
|
||||
file_names=[str(item) for item in anchors_raw.get("file_names") or [] if str(item).strip()],
|
||||
endpoint_paths=[str(item) for item in anchors_raw.get("endpoint_paths") or [] if str(item).strip()],
|
||||
target_doc_hints=[str(item) for item in anchors_raw.get("target_doc_hints") or [] if str(item).strip()],
|
||||
matched_aliases=[str(item) for item in anchors_raw.get("matched_aliases") or [] if str(item).strip()],
|
||||
process_domain=str(anchors_raw.get("process_domain") or "").strip() or None,
|
||||
process_subdomain=str(anchors_raw.get("process_subdomain") or "").strip() or None,
|
||||
),
|
||||
confidence=float(raw.get("confidence") or 1.0),
|
||||
routing_mode=str(raw.get("routing_mode") or "test_fixture"),
|
||||
llm_router_used=bool(raw.get("llm_router_used") or False),
|
||||
reason_short=str(raw.get("reason_short") or "fixture route"),
|
||||
)
|
||||
@@ -22,13 +22,23 @@ class _KeywordLlm:
|
||||
"где находится",
|
||||
"найди файл",
|
||||
"найди файлы",
|
||||
"show doc",
|
||||
"show file",
|
||||
"doc for",
|
||||
"file with",
|
||||
)
|
||||
_DOC_MARKERS = (
|
||||
"документац",
|
||||
"endpoint",
|
||||
"эндпоинт",
|
||||
"архитект",
|
||||
"architecture",
|
||||
"overview архитектуры",
|
||||
"arch overview",
|
||||
"процесс",
|
||||
"process",
|
||||
"flow",
|
||||
"workflow",
|
||||
"сущност",
|
||||
"worker",
|
||||
"цикл отправки уведомлений",
|
||||
@@ -43,6 +53,10 @@ class _KeywordLlm:
|
||||
"/health",
|
||||
"/send",
|
||||
"/actions/{action}",
|
||||
"billing invoice process",
|
||||
"billing invoice flow",
|
||||
"billing invoice docs",
|
||||
"notify app",
|
||||
)
|
||||
_GENERAL_MARKERS = (
|
||||
"что это за сервис",
|
||||
@@ -67,7 +81,7 @@ class _KeywordLlm:
|
||||
return json.dumps(route, ensure_ascii=False)
|
||||
|
||||
def _select(self, query: str) -> dict[str, object]:
|
||||
if any(marker in query for marker in self._FILE_MARKERS) or ("дока" in query and "покажи" in query):
|
||||
if any(marker in query for marker in self._FILE_MARKERS) or ("дока" in query and "покажи" in query) or ".md" in query:
|
||||
return self._route("DOCS", "DOC_EXPLAIN", "FIND_FILES", "file lookup")
|
||||
if any(marker in query for marker in self._GENERAL_MARKERS):
|
||||
return self._route("GENERAL", "GENERAL_QA", "SUMMARY", "general overview")
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
|
||||
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
|
||||
|
||||
|
||||
class ProcessV2RouterPlusPolicyExecutor:
|
||||
def __init__(self) -> None:
|
||||
self._router = V2IntentRouter(llm=_KeywordLlm(), enable_llm_disambiguation=True)
|
||||
self._resolver = V2RetrievalPolicyResolver()
|
||||
|
||||
def execute(self, case: V4Case) -> ExecutionPayload:
|
||||
route = self._router.route(case.query)
|
||||
plan = self._resolver.resolve(route)
|
||||
route_dump = asdict(route)
|
||||
actual = {
|
||||
"domain": route.routing_domain,
|
||||
"intent": route.intent,
|
||||
"sub_intent": route.subintent,
|
||||
"routing_mode": route.routing_mode,
|
||||
"llm_router_used": route.llm_router_used,
|
||||
"confidence": route.confidence,
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
"route": {
|
||||
"routing_domain": route.routing_domain,
|
||||
"intent": route.intent,
|
||||
"subintent": route.subintent,
|
||||
"target_terms": list(route.target_terms),
|
||||
"anchors": route_dump.get("anchors") or {},
|
||||
},
|
||||
"retrieval_plan": {
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
},
|
||||
}
|
||||
details = {
|
||||
"query": case.query,
|
||||
"route": route_dump,
|
||||
"plan": {
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
},
|
||||
"pipeline_steps": [
|
||||
{
|
||||
"step": "intent_router",
|
||||
"input": {"query": case.query},
|
||||
"output": {
|
||||
"domain": route.routing_domain,
|
||||
"intent": route.intent,
|
||||
"sub_intent": route.subintent,
|
||||
"reason_short": route.reason_short,
|
||||
"target_terms": list(route.target_terms),
|
||||
"anchors": route_dump.get("anchors") or {},
|
||||
},
|
||||
},
|
||||
{
|
||||
"step": "retrieval_policy_resolver",
|
||||
"input": {"route": route_dump},
|
||||
"output": {
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
return ExecutionPayload(actual=actual, details=details)
|
||||
@@ -0,0 +1,94 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import asdict
|
||||
|
||||
from app.core.agent.processes.v2 import V2IntentRouter
|
||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
||||
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
|
||||
from app.core.rag.persistence.repository import RagRepository
|
||||
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
|
||||
from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder
|
||||
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
|
||||
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
|
||||
|
||||
|
||||
class ProcessV2RouterPlusPolicyRagExecutor:
|
||||
def __init__(self) -> None:
|
||||
self._router = V2IntentRouter(llm=_KeywordLlm(), enable_llm_disambiguation=True)
|
||||
self._resolver = V2RetrievalPolicyResolver()
|
||||
self._adapter = V2RagRetrievalAdapter(RagSessionRetriever(RagRepository(), DeterministicEmbedder()))
|
||||
|
||||
def execute(self, case: V4Case) -> ExecutionPayload:
|
||||
if not case.rag_session_id:
|
||||
raise ValueError(f"Case '{case.case_id}' requires rag_session_id")
|
||||
return asyncio.run(self._execute_async(case))
|
||||
|
||||
async def _execute_async(self, case: V4Case) -> ExecutionPayload:
|
||||
route = self._router.route(case.query)
|
||||
plan = self._resolver.resolve(route)
|
||||
rows = await self._adapter.fetch_rows(case.rag_session_id or "", route.normalized_query, plan)
|
||||
route_dump = asdict(route)
|
||||
rag_summary = _summarize_rows(rows)
|
||||
actual = {
|
||||
"domain": route.routing_domain,
|
||||
"intent": route.intent,
|
||||
"sub_intent": route.subintent,
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
"route": {
|
||||
"routing_domain": route.routing_domain,
|
||||
"intent": route.intent,
|
||||
"subintent": route.subintent,
|
||||
"target_terms": list(route.target_terms),
|
||||
"anchors": route_dump.get("anchors") or {},
|
||||
},
|
||||
"retrieval_plan": {
|
||||
"profile": plan.profile,
|
||||
"layers": list(plan.layers),
|
||||
"limit": plan.limit,
|
||||
"filters": dict(plan.filters),
|
||||
},
|
||||
"rag": rag_summary,
|
||||
}
|
||||
details = {
|
||||
"query": case.query,
|
||||
"rag_session_id": case.rag_session_id,
|
||||
"route": route_dump,
|
||||
"plan": actual["retrieval_plan"],
|
||||
"rag": {
|
||||
**rag_summary,
|
||||
"rows": rows[:20],
|
||||
},
|
||||
}
|
||||
return ExecutionPayload(actual=actual, details=details)
|
||||
|
||||
|
||||
def _summarize_rows(rows: list[dict]) -> dict[str, object]:
|
||||
paths: list[str] = []
|
||||
layers: list[str] = []
|
||||
metadata_domains: list[str] = []
|
||||
metadata_subdomains: list[str] = []
|
||||
for row in rows:
|
||||
path = str(row.get("path") or "").strip()
|
||||
layer = str(row.get("layer") or "").strip()
|
||||
metadata = dict(row.get("metadata") or {})
|
||||
domain = str(metadata.get("domain") or "").strip()
|
||||
subdomain = str(metadata.get("subdomain") or "").strip()
|
||||
if path and path not in paths:
|
||||
paths.append(path)
|
||||
if layer and layer not in layers:
|
||||
layers.append(layer)
|
||||
if domain and domain not in metadata_domains:
|
||||
metadata_domains.append(domain)
|
||||
if subdomain and subdomain not in metadata_subdomains:
|
||||
metadata_subdomains.append(subdomain)
|
||||
return {
|
||||
"row_count": len(rows),
|
||||
"paths": paths,
|
||||
"layers": layers,
|
||||
"metadata_domains": metadata_domains,
|
||||
"metadata_subdomains": metadata_subdomains,
|
||||
}
|
||||
@@ -1,18 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from tests.pipeline_setup_v4.executors.process_v2_full_chain_executor import ProcessV2FullChainExecutor
|
||||
from tests.pipeline_setup_v4.executors.process_v2_retrieval_policy_executor import ProcessV2RetrievalPolicyExecutor
|
||||
from tests.pipeline_setup_v4.executors.process_v2_router_plus_policy_executor import ProcessV2RouterPlusPolicyExecutor
|
||||
from tests.pipeline_setup_v4.executors.process_v2_router_plus_policy_rag_executor import (
|
||||
ProcessV2RouterPlusPolicyRagExecutor,
|
||||
)
|
||||
from tests.pipeline_setup_v4.executors.process_v2_router_executor import ProcessV2IntentRouterExecutor
|
||||
|
||||
|
||||
class ExecutorRegistry:
|
||||
def __init__(self) -> None:
|
||||
self._router_executor: ProcessV2IntentRouterExecutor | None = None
|
||||
self._policy_executor: ProcessV2RetrievalPolicyExecutor | None = None
|
||||
self._router_plus_policy_executor: ProcessV2RouterPlusPolicyExecutor | None = None
|
||||
self._router_plus_policy_rag_executor: ProcessV2RouterPlusPolicyRagExecutor | None = None
|
||||
self._full_chain_executor: ProcessV2FullChainExecutor | None = None
|
||||
|
||||
def execute(self, component: str, case) -> object:
|
||||
if component == "process_v2_intent_router":
|
||||
return self._router().execute(case)
|
||||
if component == "process_v2_retrieval_policy_resolver":
|
||||
return self._policy().execute(case)
|
||||
if component == "process_v2_router_plus_retrieval_policy":
|
||||
return self._router_plus_policy().execute(case)
|
||||
if component == "process_v2_router_plus_retrieval_policy_rag":
|
||||
return self._router_plus_policy_rag().execute(case)
|
||||
if component == "process_v2_full_chain":
|
||||
return self._full_chain().execute(case)
|
||||
raise ValueError(f"Unsupported component: {component}")
|
||||
|
||||
def _router(self) -> ProcessV2IntentRouterExecutor:
|
||||
if self._router_executor is None:
|
||||
self._router_executor = ProcessV2IntentRouterExecutor()
|
||||
return self._router_executor
|
||||
|
||||
def _policy(self) -> ProcessV2RetrievalPolicyExecutor:
|
||||
if self._policy_executor is None:
|
||||
self._policy_executor = ProcessV2RetrievalPolicyExecutor()
|
||||
return self._policy_executor
|
||||
|
||||
def _router_plus_policy(self) -> ProcessV2RouterPlusPolicyExecutor:
|
||||
if self._router_plus_policy_executor is None:
|
||||
self._router_plus_policy_executor = ProcessV2RouterPlusPolicyExecutor()
|
||||
return self._router_plus_policy_executor
|
||||
|
||||
def _router_plus_policy_rag(self) -> ProcessV2RouterPlusPolicyRagExecutor:
|
||||
if self._router_plus_policy_rag_executor is None:
|
||||
self._router_plus_policy_rag_executor = ProcessV2RouterPlusPolicyRagExecutor()
|
||||
return self._router_plus_policy_rag_executor
|
||||
|
||||
def _full_chain(self) -> ProcessV2FullChainExecutor:
|
||||
if self._full_chain_executor is None:
|
||||
self._full_chain_executor = ProcessV2FullChainExecutor()
|
||||
return self._full_chain_executor
|
||||
|
||||
Reference in New Issue
Block a user