This commit is contained in:
2026-04-09 15:41:07 +03:00
parent 8fb76bb331
commit 6ccfe54e92
210 changed files with 16472 additions and 4199 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
from app.core.agent.processes.base import AgentProcess, ProcessResult
from app.core.agent.processes.v1.process import V1Process
from app.core.agent.processes.v2.process import V2Process
from app.core.agent.processes.v2.v2_process import V2Process
__all__ = [
"AgentProcess",
+5
View File
@@ -2,8 +2,11 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from dataclasses import field
from typing import TYPE_CHECKING
from app.schemas.changeset import ChangeItem
if TYPE_CHECKING:
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
@@ -11,6 +14,8 @@ if TYPE_CHECKING:
@dataclass(slots=True)
class ProcessResult:
answer: str = ""
changeset: list[ChangeItem] = field(default_factory=list)
apply_changeset: bool = False
class AgentProcess(ABC):
+8 -1
View File
@@ -1,4 +1,11 @@
from app.core.agent.processes.v2.process import V2Process
from app.core.agent.processes.v2.intent_router.router import V2IntentRouter
__all__ = ["V2IntentRouter", "V2Process"]
def __getattr__(name: str):
if name == "V2Process":
from app.core.agent.processes.v2.v2_process import V2Process
return V2Process
raise AttributeError(name)
Binary file not shown.
@@ -0,0 +1,53 @@
# Documentation Rules Index
Этот каталог содержит локализованную проекцию правил построения документации проекта.
Источником истины для структуры и качества документов являются process-документы:
- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/01. Process.md`
- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/04. Analitycs artefacts.md`
Файлы ниже не должны противоречить этим документам, а лишь конкретизируют их для `test_echo_app`.
## Порядок использования
1. Сначала прочитать `global/documentation-system.md`.
2. Затем прочитать `global/frontmatter.md` и `global/linking.md`.
3. Затем выбрать правило из `artifact-types/` по `doc_type`.
4. Затем использовать шаблон из `templates/`.
5. Для уточнения отдельных частей документа использовать правила из `sections/`.
## Структура каталога
- `global/` — общие правила системы документации.
- `artifact-types/` — правила по типам артефактов.
- `sections/` — правила для отдельных секций документов.
- `templates/` — шаблоны документов.
## Содержимое
### Global
- `global/documentation-system.md`
- `global/frontmatter.md`
- `global/writing-style.md`
- `global/linking.md`
- `global/naming.md`
### Artifact types
- `artifact-types/api_method.md`
- `artifact-types/logic_block.md`
- `artifact-types/architecture_overview.md`
- `artifact-types/domain_entity.md`
- `artifact-types/ui_page.md`
- `artifact-types/integration_doc.md`
### Sections
- `sections/summary.md`
- `sections/details.md`
- `sections/api-scenario.md`
- `sections/api-contract.md`
- `sections/requirements-format.md`
### Templates
- `templates/api_method.template.md`
- `templates/logic_block.template.md`
- `templates/architecture_overview.template.md`
- `templates/domain_entity.template.md`
@@ -0,0 +1,39 @@
# API Method Rules
## Назначение
Этот файл задает правила для документов типа `api_method`.
## Когда использовать
Использовать для описания одного HTTP endpoint или одного отдельного API метода.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
Внутри `## Details` обязательны:
- `### Описание`
- `### Сценарий`
- `### Функциональные требования`
- `### Нефункциональные требования`
- `### Контракт`
## Особые правила
- Сценарий оформляется как технический use case.
- Функциональные требования маркируются `FR-*`.
- Нефункциональные требования маркируются `NFR-*`.
- Контракт должен быть пригоден для последующей сборки OpenAPI.
- Если у метода есть интеграции, они выносятся в `### Интеграции`.
- Ошибки и HTTP-коды либо описываются в `### Ошибки`, либо ссылаются на централизованный каталог ошибок.
## Ошибки оформления
- Нельзя заменять контракт общим текстовым описанием.
- Нельзя смешивать несколько endpoint в одном документе.
- Нельзя хранить связи и навигацию вне frontmatter.
@@ -0,0 +1,31 @@
# Architecture Overview Rules
## Назначение
Этот файл задает правила для документов типа `architecture_overview`.
## Когда использовать
Использовать как входной документ для понимания системы, модуля или сервиса.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
## Что описывать в Details
- границы системы
- основные компоненты
- ключевые взаимодействия
- интеграционные сценарии
- главные ограничения
- ссылки на дочерние документы по API, logic, domain и другим артефактам
## Ошибки оформления
- Нельзя дублировать в архитектурном обзоре полные API-контракты.
- Нельзя делать архитектурный обзор единственным документом на всю систему без декомпозиции.
@@ -0,0 +1,30 @@
# Domain Entity Rules
## Назначение
Этот файл задает правила для документов типа `domain_entity`.
## Когда использовать
Использовать для описания одной доменной сущности, ее смысла, состояния и роли в системе.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
## Что описывать в Details
- смысл сущности
- ключевые атрибуты
- состояния или инварианты
- использование сущности в системе
- интеграции с API, workflow или внешними потребителями, если они важны для понимания модели
## Ошибки оформления
- Нельзя смешивать несколько независимых сущностей в одном документе.
- Нельзя подменять доменную сущность описанием endpoint или workflow.
@@ -0,0 +1,25 @@
# Integration Doc Rules
## Назначение
Этот файл задает правила для документов типа `integration_doc`.
## Когда использовать
Использовать для описания интеграции между системами, сервисами или внешними провайдерами.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
## Что описывать в Details
- цель интеграции
- участвующие стороны
- направление обмена
- ключевой сценарий взаимодействия
- ограничения и риски
@@ -0,0 +1,31 @@
# Logic Block Rules
## Назначение
Этот файл задает правила для документов типа `logic_block`.
## Когда использовать
Использовать для описания одного законченного блока логики, workflow или процесса.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
## Что описывать в Details
- назначение логического блока
- входы и выходы
- последовательность выполнения
- интеграции
- ключевые ограничения
- состояние и ошибки, если они важны для понимания блока
## Ошибки оформления
- Нельзя описывать весь модуль целиком, если логика распадается на несколько независимых блоков.
- Нельзя превращать документ в пересказ исходного кода построчно.
@@ -0,0 +1,24 @@
# UI Page Rules
## Назначение
Этот файл задает правила для документов типа `ui_page`.
## Когда использовать
Использовать для описания одной пользовательской страницы, экрана или отдельного UI-сценария.
## Обязательная структура
Документ должен содержать:
- YAML frontmatter
- `# <title>`
- `## Summary`
- `## Details`
## Что описывать в Details
- назначение страницы
- пользовательский сценарий
- основные блоки интерфейса
- связанные API и сущности
@@ -0,0 +1,71 @@
# Documentation Rules
Этот каталог оформляет MVP документации проекта в атомарном формате.
## Базовая структура
- Каждый документ содержит YAML frontmatter.
- В документе должен быть один `H1`, совпадающий с `title`.
- Основные разделы оформляются как `## Summary` и `## Details`.
- Внутри `Details` используются заголовки уровня `###` и ниже.
- Связи, сущности и навигация описываются во frontmatter через `related_docs`, `links`, `entities`, `parent`, `children`.
## Summary
- Краткий explain-слой быстрого контекста.
- Должен позволять быстро понять назначение документа без чтения `Details`.
- Предпочтительный формат: компактный список ключевых фактов без длинных абзацев.
## Details
- Раскрывает полное описание объекта.
- Структура `Details` зависит от типа документа.
- Сценарии, ограничения, интеграции, ошибки и кодовые привязки должны быть разнесены по отдельным подразделам.
## API documents
Для `api_method` внутри `## Details` обязательны разделы:
- `### Описание`
- `### Сценарий`
- `### Функциональные требования`
- `### Нефункциональные требования`
- `### Контракт`
Если у метода есть интеграции и ошибки, также обязательны:
- `### Интеграции`
- `### Ошибки`
- `### Связанный код`
- `### История изменений`
### Сценарий
Сценарий оформляется как технический use case и содержит:
- название
- предусловия
- триггер
- основной сценарий
- альтернативный сценарий
- обработку ошибок
- постусловие
### Требования
- Функциональные требования маркируются как `FR-1`, `FR-2`, ...
- Нефункциональные требования маркируются как `NFR-1`, `NFR-2`, ...
- Идентификаторы требований локальны в рамках одного документа.
### Контракт
Контракт должен быть пригоден для последующей сборки OpenAPI-спецификации и включать:
- входные параметры
- выходные параметры
- структуру JSON-сообщений
- обязательность полей
- типы и ограничения
- описание полей
- правила заполнения
- примеры данных
- auth
- idempotency
- timeout
- ошибки и их HTTP-коды
@@ -0,0 +1,38 @@
# Documentation System
## Назначение
Этот файл задает общую модель документации проекта.
## Базовая модель
Каждый документ должен состоять из двух слоев:
- YAML frontmatter
- контент
Контент всегда состоит из двух обязательных разделов:
- `## Summary`
- `## Details`
Над ними должен быть один заголовок `# <title>`, совпадающий со значением `title` во frontmatter.
## Принципы
- Документы должны быть атомарными.
- Один документ описывает одну тему.
- Вместо дублирования между документами используются явные ссылки.
- Связи и навигация должны быть формализованы.
- Документы должны быть пригодны для чтения человеком и для RAG.
- Документы должны быть пригодны для частичного обновления без деградации структуры.
## Типы документов
На уровне проекта поддерживаются типы:
- `api_method`
- `logic_block`
- `architecture_overview`
- `domain_entity`
- `ui_page`
- `integration_doc`
- `index_page`
- `glossary_item`
@@ -0,0 +1,67 @@
# Frontmatter Rules
## Назначение
Этот файл описывает единый контракт YAML frontmatter для всех документов.
## Обязательные поля
```yaml
id: string
title: string
doc_type: string
domain: string
sub_domain: string
related_docs: []
status: string
```
## Поля совместимости и рекомендуемые поля
```yaml
type: string
name: string
module: string
layer: string
updated_at: YYYY-MM-DD
tags: []
entities: []
parent: string | null
children: []
links: {}
source_of_truth: string
related_code: []
system_analytics_refs: []
```
## Правила
- `id` должен быть стабильным и уникальным в пределах документации проекта.
- `title` — человекочитаемый заголовок.
- `doc_type` — канонический тип документа.
- `domain` и `sub_domain` определяют бизнес-контекст документа.
- `related_docs` хранит явные связи с другими markdown-документами.
- `status` хранит жизненный цикл документа: например `draft`, `approved`, `active`.
- `type` допустимо дублировать как alias для tooling-совместимости с индексаторами.
- `name` — короткое системное имя документа.
- `module` — модуль или подсистема.
- `layer` — слой системы.
- `updated_at` хранится в формате `YYYY-MM-DD`.
## Связи и навигация
- `entities` описывает сущности, связанные с документом.
- `parent` и `children` описывают иерархию.
- `links` описывает typed graph связей между документами, кодом и интеграциями.
## Формат links
```yaml
links:
called_by:
- ext.health_probe
uses_logic:
- logic.some_flow
integrates_with:
- ext.some_system
```
@@ -0,0 +1,33 @@
# Linking Rules
## Назначение
Этот файл описывает, как связывать документы между собой.
## Иерархия
- `parent` используется для родительского документа.
- `children` используется для прямых дочерних документов.
- Иерархия должна быть осмысленной и стабильной.
- Для общей точки входа допустим `index_page`.
## Графовые связи
Для `related_docs` используются ссылки на соседние документы.
Для `links` рекомендуется использовать typed-ключи:
- `called_by`
- `uses_logic`
- `reads_db`
- `writes_db`
- `integrates_with`
- `used_by`
- `exposes_api`
- `uses_entities`
## Правила использования
- Если документ логически входит в другой, использовать `parent`/`children`.
- Если связь нужна для навигации между равноправными документами, дублировать ее в `related_docs`.
- Если связь отражает поведение, интеграции или переиспользование, фиксировать ее в `links`.
- Детальное описание интеграций хранить в body документа, а не только во frontmatter.
@@ -0,0 +1,24 @@
# Naming Rules
## Назначение
Этот файл описывает правила именования документов, файлов и идентификаторов.
## Правила для файлов
- Имена файлов должны быть в kebab-case.
- Имя файла должно отражать одну тему.
- Для шаблонов использовать суффикс `.template.md`.
## Правила для id
- `id` строится в формате `<type-group>.<name>`.
- Примеры:
- `api.send_message_endpoint`
- `logic.telegram_notification_loop`
- `architecture.telegram_notify_app`
## Правила для title
- `title` должен быть кратким и человекочитаемым.
- В `title` допускаются пробелы и естественный язык.
@@ -0,0 +1,19 @@
# Writing Style
## Назначение
Этот файл задает правила стиля для текстового наполнения документации.
## Правила стиля
- Текст должен быть лаконичным.
- Формулировки должны быть точными и техническими.
- Summary должен быть кратким explain-слоем.
- Details должен раскрывать суть без лишней воды.
- Нежелательно смешивать несколько тем в одном документе.
- Если детали относятся к другому артефакту, их нужно выносить в отдельный документ.
## Язык
- Основной язык документации — русский.
- Технические термины, названия классов, API, RAG, OpenAPI, runtime и другие устоявшиеся identifiers можно оставлять на английском.
@@ -0,0 +1,24 @@
# API Contract Rules
## Назначение
Этот файл описывает, как оформлять подраздел `## Контракт` в API-документах.
## Что должно быть описано
- входные параметры
- выходные параметры
- JSON-структуры запросов и ответов
- обязательность полей
- типы полей
- ограничения
- описание назначения полей
- примеры данных
- auth
- idempotency
- timeout
- ошибки и их HTTP-коды
## Правило качества
Контракт должен быть достаточно формальным, чтобы по нему можно было собрать OpenAPI-спецификацию.
@@ -0,0 +1,21 @@
# API Scenario Rules
## Назначение
Этот файл описывает, как оформлять подраздел `### Сценарий` в API-документах.
## Обязательные части
- название
- предусловия
- триггер
- основной сценарий
- альтернативный сценарий
- обработка ошибок
- постусловие
## Правила
- Сценарий должен быть лаконичным.
- Сценарий должен отражать суть шага.
- Сложные технические детали надо выносить в `FR-*`.
@@ -0,0 +1,13 @@
# Details Section Rules
## Назначение
Этот файл задает общие правила для секции `## Details`.
## Правила
- `Details` оформляется как `## Details`.
- Внутри `Details` используются заголовки уровня `###` и ниже.
- Структура Details зависит от типа документа.
- В Details не нужно повторно дублировать навигацию и связи, если они уже есть во frontmatter.
- Интеграции, ошибки и кодовые привязки должны быть выделены в отдельные подразделы, если они существенны для понимания документа.
@@ -0,0 +1,16 @@
# Requirements Format Rules
## Назначение
Этот файл задает формат для функциональных и нефункциональных требований.
## Функциональные требования
- Использовать коды `FR-1`, `FR-2`, `FR-3` и так далее.
- Каждое требование должно описывать отдельный обязательный аспект поведения.
- Идентификаторы локальны в пределах одного документа.
## Нефункциональные требования
- Использовать коды `NFR-1`, `NFR-2`, `NFR-3` и так далее.
- Требования должны описывать характеристики качества, ограничения и эксплуатационные свойства.
@@ -0,0 +1,13 @@
# Summary Section Rules
## Назначение
Этот файл задает правила для секции `## Summary`.
## Правила
- Summary должен быть коротким explain-слоем быстрого контекста.
- Summary должен объяснять суть документа без лишних деталей.
- Summary должен быть пригоден для explain и быстрого чтения.
- Предпочтительный формат: список ключевых фактов `Purpose`, `Actor`, `Trigger`, `Errors`, `Related ...` и т.д.
- Для крупных документов допустим более длинный summary, если он остается структурированным.
@@ -0,0 +1,84 @@
---
id: api.example_method
type: api_method
doc_type: api_method
name: example_method
title: HTTP API /example
module: example_module
layer: application
domain: example_domain
sub_domain: example_subdomain
related_docs: []
status: draft
updated_at: 2026-03-20
source_of_truth: code
parent: null
children: []
tags: []
entities: []
links: {}
---
# HTTP API /example
## Summary
Краткое описание метода.
## Details
## Описание
Короткое описание сути метода.
## Сценарий
**Название:**
**Предусловия:**
-
**Триггер:**
-
**Основной сценарий:**
1.
**Альтернативный сценарий:**
1.
**Обработка ошибок:**
1.
**Постусловие:**
-
## Функциональные требования
**FR-1.**
## Нефункциональные требования
**NFR-1.**
## Контракт
### Входные параметры
| Параметр | Где передается | Тип | Обязательность | Ограничения | Описание | Пример |
|---|---|---|---|---|---|---|
| | | | | | | |
### Выходные параметры
| Поле | Тип | Обязательность | Ограничения | Описание | Заполнение | Пример |
|---|---|---|---|---|---|---|
| | | | | | | |
### Интеграции
### Ошибки
### Связанный код
### История изменений
@@ -0,0 +1,48 @@
---
id: architecture.example_system
type: architecture_overview
doc_type: architecture_overview
name: example_system
title: Обзор архитектуры Example System
module: example_module
layer: system
domain: example_domain
sub_domain: example_subdomain
related_docs: []
status: draft
updated_at: 2026-03-20
source_of_truth: mixed
parent: null
children: []
tags: []
entities: []
links: {}
---
# Обзор архитектуры Example System
## Summary
Краткое описание архитектуры.
## Details
### Описание
### Контекст
### Границы системы
### Компоненты
### Интеграционные сценарии
### Интеграции
### Ограничения
### Связанный код
### Связанные документы
### История изменений
@@ -0,0 +1,48 @@
---
id: domain.example_entity
type: domain_entity
doc_type: domain_entity
name: example_entity
title: Пример доменной сущности
module: example_module
layer: domain
domain: example_domain
sub_domain: example_subdomain
related_docs: []
status: draft
updated_at: 2026-03-20
source_of_truth: code
parent: null
children: []
tags: []
entities: []
links: {}
---
# Пример доменной сущности
## Summary
Краткое описание сущности.
## Details
### Описание
### Модель данных
### Состояния и инварианты
### Технический use case
### Функциональные требования
### Нефункциональные требования
### Интеграции
### Связанный код
### Связанные документы
### История изменений
@@ -0,0 +1,50 @@
---
id: logic.example_block
type: logic_block
doc_type: logic_block
name: example_block
title: Пример блока логики
module: example_module
layer: application
domain: example_domain
sub_domain: example_subdomain
related_docs: []
status: draft
updated_at: 2026-03-20
source_of_truth: code
parent: null
children: []
tags: []
entities: []
links: {}
---
# Пример блока логики
## Summary
Краткое описание блока логики.
## Details
### Описание
### Контекст
### Технический use case
### Функциональные требования
### Нефункциональные требования
### Интеграции
### Ограничения и условия вызова
### Ошибки и деградации
### Связанные API
### Связанный код
### История изменений
@@ -16,3 +16,4 @@ class QueryFeatures:
logic_markers: list[str]
domain_markers: list[str]
endpoint_markers: list[str]
scope_type: str = "unknown"
@@ -4,7 +4,7 @@ import re
from dataclasses import dataclass
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
from app.core.agent.processes.v2.models import V2RouteAnchors
from app.core.agent.utils.process_v2.models import V2RouteAnchors
@dataclass(slots=True)
@@ -0,0 +1,176 @@
"""Build an in-memory DOCS scope index from D1/D3 catalog rows (no chunk retrieval).
Parses metadata from ``D1_DOCUMENT_CATALOG`` and ``D3_ENTITY_CATALOG`` rows produced by the
existing RAG indexer—no additional layers or chunk scans.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
def _norm_text(value: object) -> str:
return re.sub(r"\s+", " ", str(value or "").strip().lower())
def _split_multi(value: object) -> list[str]:
if value is None:
return []
if isinstance(value, list):
raw = value
else:
raw = re.split(r"[;,|]", str(value))
out: list[str] = []
for item in raw:
s = str(item).strip()
if s:
out.append(s)
return out
@dataclass(slots=True)
class DocsScopeCatalog:
"""Flattened terms from D1_DOCUMENT_CATALOG and D3_ENTITY_CATALOG for lexical grounding."""
domain_values: set[str] = field(default_factory=set)
subdomain_pairs: list[tuple[str, str]] = field(default_factory=list) # (domain, subdomain)
entity_records: list[dict[str, object]] = field(default_factory=list)
api_records: list[dict[str, object]] = field(default_factory=list)
def build_docs_scope_catalog(rows: list[dict]) -> DocsScopeCatalog:
"""Derive searchable terms from catalog layers only (existing RAG index rows)."""
catalog = DocsScopeCatalog()
for row in rows:
layer = str(row.get("layer") or "")
meta = row.get("metadata")
if not isinstance(meta, dict):
meta = {}
path = str(row.get("path") or "")
title = str(row.get("title") or "")
content = str(row.get("content") or "")
if layer == "D1_DOCUMENT_CATALOG":
_ingest_d1_row(catalog, path=path, title=title, content=content, metadata=meta)
elif layer == "D3_ENTITY_CATALOG":
_ingest_d3_row(catalog, path=path, title=title, metadata=meta)
return catalog
def _ingest_d1_row(
catalog: DocsScopeCatalog,
*,
path: str,
title: str,
content: str,
metadata: dict,
) -> None:
doc_type = _norm_text(metadata.get("type") or metadata.get("doc_type"))
domain = _norm_text(metadata.get("domain"))
subdomain = _norm_text(metadata.get("subdomain"))
name = _norm_text(metadata.get("name"))
summary = _norm_text(metadata.get("summary_text"))
endpoint = _norm_text(metadata.get("endpoint"))
entities = [_norm_text(e) for e in _split_multi(metadata.get("entities"))]
tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
if domain:
catalog.domain_values.add(domain)
if domain and subdomain:
catalog.subdomain_pairs.append((domain, subdomain))
blob = " ".join(x for x in (name, title, summary, content) if x)
for ent in entities:
if ent:
catalog.entity_records.append(
{
"name": ent,
"domain": domain or None,
"subdomain": subdomain or None,
"source_layer": "D1_DOCUMENT_CATALOG",
"path": path,
"blob": blob,
}
)
for tag in tags:
if tag and len(tag) >= 3:
catalog.entity_records.append(
{
"name": tag,
"domain": domain or None,
"subdomain": subdomain or None,
"source_layer": "D1_DOCUMENT_CATALOG",
"path": path,
"blob": blob,
}
)
is_api_method = doc_type == "api_method" or "api_method" in path.lower()
if is_api_method or endpoint:
ep = endpoint or _endpoint_from_title(title)
if ep:
catalog.api_records.append(
{
"endpoint": ep,
"domain": domain or None,
"source_layer": "D1_DOCUMENT_CATALOG",
"path": path,
"title": title,
}
)
def _ingest_d3_row(
catalog: DocsScopeCatalog,
*,
path: str,
title: str,
metadata: dict,
) -> None:
entity_name = str(metadata.get("entity_name") or "").strip()
domain = _norm_text(metadata.get("domain"))
subdomain = _norm_text(metadata.get("subdomain"))
module = _norm_text(metadata.get("module"))
source_path = str(metadata.get("source_path") or "").strip()
tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
if domain:
catalog.domain_values.add(domain)
if domain and subdomain:
catalog.subdomain_pairs.append((domain, subdomain))
blob = " ".join(
_norm_text(x)
for x in (entity_name, title, module, source_path, " ".join(tags))
if x
)
if entity_name:
catalog.entity_records.append(
{
"name": _norm_text(entity_name),
"domain": domain or None,
"subdomain": subdomain or None,
"module": module or None,
"source_layer": "D3_ENTITY_CATALOG",
"path": path or source_path,
"blob": blob,
}
)
def _endpoint_from_title(title: str) -> str:
t = str(title or "").strip()
if not t:
return ""
upper = t.upper()
for method in ("GET ", "POST ", "PUT ", "PATCH ", "DELETE "):
if method in upper:
idx = upper.index(method)
rest = t[idx:].split()
if len(rest) >= 2 and rest[1].startswith("/"):
return _norm_text(rest[1])
m = re.search(r"(\/[a-z0-9_./{}-]+)", t, re.IGNORECASE)
return _norm_text(m.group(1)) if m else ""
@@ -0,0 +1,443 @@
"""Deterministic scope resolution from query + derived DOCS catalog (pre-LLM).
Matches the user query against catalog terms (exact / normalized). Optional embedding-based
retrieval can extend candidates later; final ``scope_type`` never relies on embeddings alone.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
from app.core.agent.utils.process_v2.models import ScopeCandidate, V2ScopeType
_SCORE_EXACT = 1.0
_SCORE_NORMALIZED = 0.88
_SCORE_SOFT = 0.72
_STRONG_THRESHOLD = 0.85
_ENUM_MARKERS_RU = (
"какие ",
"какие\n",
"какой ",
"какого ",
"список",
"перечисли",
"перечислить",
"все api",
"все методы",
"какие api",
"какие методы",
"каких ",
)
_SINGLE_SEGMENT_ENDPOINT_ALLOWLIST = frozenset(
{
"/health",
"/send",
"/healthz",
"/ready",
"/live",
"/metrics",
}
)
_PROJECT_WIDE_MARKERS = (
"в проекте",
"в системе",
"в приложении",
"по проекту",
"во всем проекте",
"overall",
"in the project",
)
@dataclass(slots=True)
class ScopeResolution:
scope_type: str = V2ScopeType.UNKNOWN
candidate_domains: list[ScopeCandidate] = field(default_factory=list)
candidate_subdomains: list[ScopeCandidate] = field(default_factory=list)
candidate_entities: list[ScopeCandidate] = field(default_factory=list)
candidate_apis: list[ScopeCandidate] = field(default_factory=list)
strong_domain: str | None = None
strong_subdomain: str | None = None
strong_entity_names: list[str] = field(default_factory=list)
strong_endpoint_paths: list[str] = field(default_factory=list)
catalog_loaded: bool = False
def _catalog_has_index_terms(catalog: DocsScopeCatalog) -> bool:
return bool(
catalog.domain_values
or catalog.subdomain_pairs
or catalog.entity_records
or catalog.api_records
)
def plausible_doc_endpoint_paths(paths: list[str]) -> list[str]:
"""Drop spurious ``/token`` paths from api-like heuristics (e.g. ``/billing`` after ``api``)."""
out: list[str] = []
for raw in paths:
p = str(raw or "").strip().lower()
if not p.startswith("/"):
continue
segments = [s for s in p.split("/") if s]
if len(segments) >= 2:
out.append(p)
continue
if len(segments) == 1 and p in _SINGLE_SEGMENT_ENDPOINT_ALLOWLIST:
out.append(p)
continue
return out
def resolve_docs_scope(
normalized_query: str,
terms: TargetTermsAnalysis,
catalog: DocsScopeCatalog | None,
) -> ScopeResolution:
"""Lexical scope resolution; embeddings never set final scope alone (not used here)."""
resolution = ScopeResolution()
if catalog is None:
return resolution
if not _catalog_has_index_terms(catalog):
return resolution
resolution.catalog_loaded = True
query_l = _norm_query(normalized_query)
if not query_l:
resolution.scope_type = V2ScopeType.UNKNOWN
return resolution
_collect_domain_candidates(query_l, catalog, resolution)
_collect_subdomain_candidates(query_l, catalog, resolution)
_collect_entity_candidates(query_l, catalog, resolution)
_collect_api_candidates(query_l, catalog, resolution)
_dedupe_candidates(resolution)
endpoint_paths = plausible_doc_endpoint_paths(list(terms.endpoint_paths))
strong_api = _pick_strong(resolution.candidate_apis)
strong_entity = _pick_strong(resolution.candidate_entities)
strong_sub = _pick_strong(resolution.candidate_subdomains)
strong_dom = _pick_strong(resolution.candidate_domains)
resolution.strong_endpoint_paths = list(dict.fromkeys(endpoint_paths))
if endpoint_paths:
resolution.scope_type = V2ScopeType.ENTITY
resolution.strong_entity_names = _merge_unique(resolution.strong_entity_names, _entities_for_endpoints(endpoint_paths, catalog))
return resolution
if strong_api and strong_api.score >= _STRONG_THRESHOLD:
resolution.scope_type = V2ScopeType.ENTITY
resolution.strong_endpoint_paths = _merge_unique(resolution.strong_endpoint_paths, [strong_api.value])
return resolution
strong_sub_pre = _pick_strong(resolution.candidate_subdomains)
if (
strong_sub_pre
and strong_sub_pre.score >= _STRONG_THRESHOLD
and _subdomain_aligned_with_query(query_l, strong_sub_pre.value)
):
resolution.scope_type = V2ScopeType.SUBDOMAIN
parts = _split_subdomain_value(strong_sub_pre.value)
if parts:
resolution.strong_domain = parts[0]
resolution.strong_subdomain = parts[1]
return resolution
if strong_entity and strong_entity.score >= _STRONG_THRESHOLD:
resolution.scope_type = V2ScopeType.ENTITY
resolution.strong_entity_names = _merge_unique(
resolution.strong_entity_names,
[strong_entity.value],
)
return resolution
if strong_sub and strong_sub.score >= _STRONG_THRESHOLD:
resolution.scope_type = V2ScopeType.SUBDOMAIN
parts = _split_subdomain_value(strong_sub.value)
if parts:
resolution.strong_domain = parts[0]
resolution.strong_subdomain = parts[1]
return resolution
if strong_dom and strong_dom.score >= _STRONG_THRESHOLD:
resolution.scope_type = V2ScopeType.DOMAIN
resolution.strong_domain = strong_dom.value
return resolution
if _is_global_enumeration(query_l, has_strong_any=bool(_any_strong(resolution))):
resolution.scope_type = V2ScopeType.GLOBAL
return resolution
resolution.scope_type = V2ScopeType.UNKNOWN
return resolution
def promote_target_terms(
raw_terms: list[str],
terms: TargetTermsAnalysis,
resolution: ScopeResolution,
) -> list[str]:
"""Keep only high-confidence terms in ``target_terms``; weak matches stay in candidate_* only."""
if not resolution.catalog_loaded:
return list(raw_terms)
out: list[str] = []
strong_values = {c.value for c in _all_candidates(resolution) if c.score >= _STRONG_THRESHOLD}
strong_values |= {c.value for c in _all_candidates(resolution) if c.match_type == "exact"}
strong_entity = set(resolution.strong_entity_names)
endpoints = set(terms.endpoint_paths)
aliases = set(terms.matched_aliases)
for term in raw_terms:
t = str(term or "").strip()
if not t:
continue
tl = t.lower()
if t in endpoints or tl in {e.lower() for e in endpoints}:
_append_unique(out, tl if tl.startswith("/") else t)
continue
if t in aliases or tl in {a.lower() for a in aliases}:
_append_unique(out, tl)
continue
if tl in strong_values or t in strong_entity:
_append_unique(out, tl)
continue
if _is_explicit_identifier(t) and tl in strong_entity:
_append_unique(out, tl)
continue
# Drop weak/ungrounded terms (remain only in candidates on anchors)
return out
def _all_candidates(resolution: ScopeResolution) -> list[ScopeCandidate]:
return [
*resolution.candidate_domains,
*resolution.candidate_subdomains,
*resolution.candidate_entities,
*resolution.candidate_apis,
]
def _any_strong(resolution: ScopeResolution) -> bool:
return any(c.score >= _STRONG_THRESHOLD for c in _all_candidates(resolution))
def _pick_strong(candidates: list[ScopeCandidate]) -> ScopeCandidate | None:
if not candidates:
return None
return max(candidates, key=lambda c: (c.score, len(c.value)))
def _norm_query(q: str) -> str:
return re.sub(r"\s+", " ", str(q or "").strip().lower())
def _append_unique(items: list[str], value: str) -> None:
if value and value not in items:
items.append(value)
def _merge_unique(a: list[str], b: list[str]) -> list[str]:
return list(dict.fromkeys([*a, *b]))
def _is_explicit_identifier(token: str) -> bool:
return bool(re.fullmatch(r"[A-Za-z][A-Za-z0-9_]+", token))
def _split_subdomain_value(value: str) -> tuple[str, str] | None:
parts = str(value or "").split("::", 1)
if len(parts) == 2 and parts[0] and parts[1]:
return parts[0].strip().lower(), parts[1].strip().lower()
return None
def _subdomain_aligned_with_query(query_l: str, composite: str) -> bool:
"""True when both domain and subdomain tokens match the query (substring / token match)."""
parts = str(composite or "").split("::", 1)
if len(parts) != 2:
return False
dom, sub = parts[0].strip().lower(), parts[1].strip().lower()
s_dom, _ = _match_score(query_l, dom)
s_sub, _ = _match_score(query_l, sub)
return s_dom > 0 and s_sub > 0
def _entities_for_endpoints(endpoint_paths: list[str], catalog: DocsScopeCatalog) -> list[str]:
found: list[str] = []
eps = {e.lower() for e in endpoint_paths if e}
for rec in catalog.entity_records:
blob = str(rec.get("blob") or "").lower()
name = str(rec.get("name") or "").strip().lower()
if not name:
continue
if any(ep and ep in blob for ep in eps):
_append_unique(found, name)
return found
def _collect_domain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
for dom in catalog.domain_values:
if not dom:
continue
score, mtype = _match_score(query_l, dom)
if score <= 0:
continue
resolution.candidate_domains.append(
ScopeCandidate(
value=dom,
score=score,
source_layer="D1_DOCUMENT_CATALOG",
match_type=mtype,
)
)
def _collect_subdomain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
seen: set[str] = set()
for dom, sub in catalog.subdomain_pairs:
if not dom or not sub:
continue
composite = f"{dom}::{sub}"
if composite in seen:
continue
seen.add(composite)
score_dom, _ = _match_score(query_l, dom)
score_sub, mt_sub = _match_score(query_l, sub)
phrase = _phrase_score(query_l, dom, sub)
if phrase > 0:
score = phrase
mt = "normalized"
elif score_dom > 0 and score_sub > 0:
score = min(score_dom, score_sub)
mt = mt_sub
else:
# Avoid promoting a (domain, subdomain) pair when only the domain token matches.
score = 0.0
mt = mt_sub
if score <= 0:
continue
resolution.candidate_subdomains.append(
ScopeCandidate(
value=composite,
score=score,
source_layer="D1_DOCUMENT_CATALOG",
match_type=mt,
)
)
def _collect_entity_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
for rec in catalog.entity_records:
name = str(rec.get("name") or "").strip().lower()
if not name or len(name) < 2:
continue
blob = str(rec.get("blob") or "").lower()
layer = str(rec.get("source_layer") or "")
score, mtype = _match_entity(query_l, name, blob)
if score <= 0:
continue
resolution.candidate_entities.append(
ScopeCandidate(value=name, score=score, source_layer=layer, match_type=mtype)
)
def _collect_api_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
for rec in catalog.api_records:
ep = str(rec.get("endpoint") or "").strip().lower()
if not ep:
continue
layer = str(rec.get("source_layer") or "")
score, mtype = _match_score(query_l, ep.replace(" ", ""))
if score <= 0:
continue
resolution.candidate_apis.append(
ScopeCandidate(value=ep, score=score, source_layer=layer, match_type=mtype)
)
def _phrase_score(query_l: str, dom: str, sub: str) -> float:
if _contains_token(query_l, dom) and _contains_token(query_l, sub):
return max(_SCORE_NORMALIZED, 0.9)
joined = re.sub(r"\s+", " ", f"{dom} {sub}".strip())
if joined in query_l or query_l in joined:
return _SCORE_NORMALIZED
return 0.0
def _match_entity(query_l: str, name: str, blob: str) -> tuple[float, str]:
score, mt = _match_score(query_l, name)
if score > 0:
return score, mt
if name in blob and len(name) >= 4:
# cross-language hints: name appears in catalog blob; small boost if query token overlaps blob
q_tokens = set(query_l.split())
b_tokens = set(blob.split())
overlap = q_tokens & b_tokens
if overlap and (q_tokens & {name} or name[:4] in query_l):
return _SCORE_SOFT, "normalized"
return 0.0, "normalized"
def _match_score(query_l: str, value: str) -> tuple[float, str]:
v = str(value or "").strip().lower()
if not v:
return 0.0, "normalized"
v_compact = v.replace(" ", "")
q_compact = query_l.replace(" ", "")
if v == query_l or v_compact == q_compact:
return _SCORE_EXACT, "exact"
if _contains_token(query_l, v) or _contains_token(query_l, v.replace("/", " ")):
return _SCORE_EXACT, "exact"
if v in q_compact or v_compact in q_compact:
return _SCORE_NORMALIZED, "normalized"
if v in query_l:
return _SCORE_NORMALIZED, "normalized"
# prefix / slug
for token in query_l.split():
if token.startswith(v[: min(4, len(v))]) and len(v) >= 4:
return _SCORE_SOFT, "normalized"
return 0.0, "normalized"
def _contains_token(hay: str, needle: str) -> bool:
if not needle:
return False
return f" {needle} " in f" {hay} "
def _dedupe_candidates(resolution: ScopeResolution) -> None:
resolution.candidate_domains = _dedupe_list(resolution.candidate_domains)
resolution.candidate_subdomains = _dedupe_list(resolution.candidate_subdomains)
resolution.candidate_entities = _dedupe_list(resolution.candidate_entities)
resolution.candidate_apis = _dedupe_list(resolution.candidate_apis)
def _dedupe_list(items: list[ScopeCandidate]) -> list[ScopeCandidate]:
best: dict[str, ScopeCandidate] = {}
for c in items:
key = f"{c.value}|{c.source_layer}"
prev = best.get(key)
if prev is None or c.score > prev.score:
best[key] = c
return sorted(best.values(), key=lambda c: (-c.score, c.value))
def _is_global_enumeration(query_l: str, *, has_strong_any: bool) -> bool:
if has_strong_any:
return False
if any(m in query_l for m in _PROJECT_WIDE_MARKERS) and any(
m in query_l for m in ("какие", "какой", "список", "перечисли", "метод", "api")
):
return True
if any(query_l.strip().startswith(m.strip()) for m in _ENUM_MARKERS_RU if len(m.strip()) > 2):
if any(k in query_l for k in ("метод", "api", "ручк", "эндпоинт")):
return True
return False
@@ -2,8 +2,17 @@
from __future__ import annotations
from collections.abc import Callable
from dataclasses import replace
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer
from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog, build_docs_scope_catalog
from app.core.agent.processes.v2.intent_router.modules.scope_resolver import (
plausible_doc_endpoint_paths,
promote_target_terms,
resolve_docs_scope,
)
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
from app.core.agent.processes.v2.intent_router.routers.confidence import V2ConfidenceAdjuster
@@ -11,8 +20,18 @@ from app.core.agent.processes.v2.intent_router.routers.fallback import V2Fallbac
from app.core.agent.processes.v2.intent_router.routers.llm import V2LlmRouter
from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2RouteCatalog
from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator
from app.core.agent.processes.v2.models import V2RouteResult
from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType
from app.core.agent.utils.llm import AgentLlmService
from app.core.rag.persistence.query_repository import RagQueryRepository
def _scope_candidate_dict(candidate) -> dict[str, object]:
return {
"value": candidate.value,
"score": candidate.score,
"source_layer": candidate.source_layer,
"match_type": candidate.match_type,
}
class V2IntentRouter:
@@ -25,6 +44,7 @@ class V2IntentRouter:
enable_llm_disambiguation: bool = True,
route_catalog: V2RouteCatalog | None = None,
confidence_adjuster: V2ConfidenceAdjuster | None = None,
scope_rows_provider: Callable[[str], list[dict]] | None = None,
) -> None:
self._normalizer = normalizer or V2QueryNormalizer()
self._target_terms_extractor = target_terms_extractor or V2TargetTermsExtractor()
@@ -35,23 +55,48 @@ class V2IntentRouter:
self._confidence_adjuster = confidence_adjuster or V2ConfidenceAdjuster()
self._enable_llm_disambiguation = enable_llm_disambiguation
self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None
self._scope_rows_provider = scope_rows_provider
def route(self, user_query: str) -> V2RouteResult:
def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult:
normalized_query = self._normalizer.normalize(user_query)
target_terms_analysis = self._target_terms_extractor.extract(normalized_query)
anchor_analysis = self._anchor_extractor.extract(normalized_query, target_terms_analysis)
sanitized_eps = plausible_doc_endpoint_paths(list(target_terms_analysis.endpoint_paths))
if sanitized_eps != list(target_terms_analysis.endpoint_paths):
target_terms_analysis = replace(target_terms_analysis, endpoint_paths=sanitized_eps)
allowed_paths = set(sanitized_eps)
target_terms_analysis = replace(
target_terms_analysis,
target_terms=[
t
for t in target_terms_analysis.target_terms
if not str(t).startswith("/") or str(t).lower() in allowed_paths
],
)
raw_target_terms = list(target_terms_analysis.target_terms)
scope_rows = self._load_scope_rows(rag_session_id)
scope_catalog: DocsScopeCatalog | None
if not scope_rows:
scope_catalog = None
else:
scope_catalog = build_docs_scope_catalog(scope_rows)
resolution = resolve_docs_scope(normalized_query, target_terms_analysis, scope_catalog)
promoted_terms = promote_target_terms(raw_target_terms, target_terms_analysis, resolution)
refined_terms = replace(target_terms_analysis, target_terms=promoted_terms)
anchor_analysis = self._anchor_extractor.extract(normalized_query, refined_terms)
self._apply_scope_to_anchors(anchor_analysis.anchors, resolution)
features = QueryFeatures(
normalized_query=normalized_query,
target_terms=list(target_terms_analysis.target_terms),
endpoint_paths=list(target_terms_analysis.endpoint_paths),
target_terms=list(refined_terms.target_terms),
endpoint_paths=list(refined_terms.endpoint_paths),
file_names=list(anchor_analysis.anchors.file_names),
matched_aliases=list(target_terms_analysis.matched_aliases),
matched_aliases=list(refined_terms.matched_aliases),
target_doc_hints=list(anchor_analysis.anchors.target_doc_hints),
file_markers=list(anchor_analysis.file_markers),
architecture_markers=list(anchor_analysis.architecture_markers),
logic_markers=list(anchor_analysis.logic_markers),
domain_markers=list(anchor_analysis.domain_markers),
endpoint_markers=list(anchor_analysis.endpoint_markers),
scope_type=resolution.scope_type,
)
llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None
llm_candidate = self._route_with_llm(
@@ -59,7 +104,6 @@ class V2IntentRouter:
anchors=anchor_analysis.anchors,
)
llm_result = self._validator.validate(llm_candidate)
llm_result = self._apply_deterministic_corrections(llm_result, features)
if llm_result is not None:
confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features)
return V2RouteResult(
@@ -74,14 +118,53 @@ class V2IntentRouter:
routing_mode="llm_default",
llm_router_used=True,
reason_short=str(llm_result["reason_short"]),
scope_type=resolution.scope_type,
)
if llm_attempted:
return self._fallback_router.route_without_deterministic_signals(
user_query=user_query,
features=features,
anchors=anchor_analysis.anchors,
scope_type=resolution.scope_type,
)
return self._fallback_router.route(
user_query=user_query,
features=features,
anchors=anchor_analysis.anchors,
llm_attempted=llm_attempted,
scope_type=resolution.scope_type,
)
def _load_scope_rows(self, rag_session_id: str | None) -> list[dict]:
sid = str(rag_session_id or "").strip()
if not sid:
return []
if self._scope_rows_provider is not None:
return self._scope_rows_provider(sid)
try:
return RagQueryRepository().list_docs_scope_index_rows(sid)
except Exception:
return []
def _apply_scope_to_anchors(self, anchors, resolution) -> None:
anchors.candidate_domains = list(resolution.candidate_domains)
anchors.candidate_subdomains = list(resolution.candidate_subdomains)
anchors.candidate_entities = list(resolution.candidate_entities)
anchors.candidate_apis = list(resolution.candidate_apis)
if not resolution.catalog_loaded:
return
merged_endpoints = list(dict.fromkeys([*resolution.strong_endpoint_paths, *anchors.endpoint_paths]))
anchors.endpoint_paths = merged_endpoints
merged_entities = list(dict.fromkeys([*resolution.strong_entity_names, *anchors.entity_names]))
anchors.entity_names = merged_entities
if resolution.strong_domain:
anchors.process_domain = resolution.strong_domain
if resolution.strong_subdomain:
anchors.process_subdomain = resolution.strong_subdomain
if resolution.scope_type == V2ScopeType.SUBDOMAIN and resolution.strong_domain and resolution.strong_subdomain:
anchors.process_domain = resolution.strong_domain
anchors.process_subdomain = resolution.strong_subdomain
def _route_with_llm(self, *, features: QueryFeatures, anchors) -> dict | None:
if not self._enable_llm_disambiguation or self._llm_router is None:
return None
@@ -89,6 +172,7 @@ class V2IntentRouter:
return self._llm_router.classify(
normalized_query=features.normalized_query,
target_terms=features.target_terms,
scope_type=features.scope_type,
anchors={
"entity_names": anchors.entity_names,
"file_names": anchors.file_names,
@@ -97,22 +181,11 @@ class V2IntentRouter:
"matched_aliases": anchors.matched_aliases,
"process_domain": anchors.process_domain,
"process_subdomain": anchors.process_subdomain,
"candidate_domains": [_scope_candidate_dict(c) for c in anchors.candidate_domains],
"candidate_subdomains": [_scope_candidate_dict(c) for c in anchors.candidate_subdomains],
"candidate_entities": [_scope_candidate_dict(c) for c in anchors.candidate_entities],
"candidate_apis": [_scope_candidate_dict(c) for c in anchors.candidate_apis],
},
)
except Exception:
return None
def _apply_deterministic_corrections(self, candidate: dict | None, features: QueryFeatures) -> dict | None:
if candidate is None:
return None
if candidate.get("routing_domain") == "DOCS" and self._should_force_find_files(features):
corrected = dict(candidate)
corrected["subintent"] = "FIND_FILES"
return corrected
return candidate
def _should_force_find_files(self, features: QueryFeatures) -> bool:
if features.file_markers or features.file_names:
return True
query = features.normalized_query.lower()
return "show doc" in query or "show file" in query or "doc for" in query
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
from app.core.agent.processes.v2.intent_router.routers.docs_subintent_resolver import DocsSubintentResolver
@@ -1,13 +1,33 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
from app.core.agent.processes.v2.models import V2Subintent
from app.core.agent.utils.process_v2.models import V2Subintent
class DocsSubintentResolver:
_API_ENUM_MARKERS = (
"какие api",
"какие эндпоинты",
"какие endpoint",
"список api",
"список эндпоинтов",
"список endpoint",
"все api",
"все эндпоинты",
"перечисли api",
"перечисли эндпоинты",
"доступные api",
"available endpoints",
"exposed api",
)
_API_WORD_MARKERS = ("api", "эндпоинт", "endpoint", "роут", "route", "метод")
_LIST_WORD_MARKERS = ("какие", "список", "перечисли", "все", "доступные", "list", "available", "exposed")
def resolve(self, features: QueryFeatures) -> str | None:
if features.file_markers or self._has_file_like_anchor(features):
return V2Subintent.FIND_FILES
if self._is_api_exposed_request(features):
return V2Subintent.API_EXPOSED
if any(
(
features.endpoint_paths,
@@ -26,3 +46,13 @@ class DocsSubintentResolver:
hint.endswith((".md", ".yaml", ".yml", ".json"))
for hint in features.target_doc_hints
) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names)
def _is_api_exposed_request(self, features: QueryFeatures) -> bool:
query = features.normalized_query.lower()
if features.endpoint_paths:
return False
if any(marker in query for marker in self._API_ENUM_MARKERS):
return True
has_api_words = any(marker in query for marker in self._API_WORD_MARKERS)
has_list_words = any(marker in query for marker in self._LIST_WORD_MARKERS)
return has_api_words and has_list_words
@@ -1,10 +1,33 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2ScopeType, V2Subintent
class V2FallbackRouter:
def route_without_deterministic_signals(
self,
*,
user_query: str,
features: QueryFeatures,
anchors,
scope_type: str = V2ScopeType.UNKNOWN,
) -> V2RouteResult:
return V2RouteResult(
routing_domain=V2Domain.GENERAL,
intent=V2Intent.GENERAL_QA,
subintent=V2Subintent.SUMMARY,
user_query=user_query,
normalized_query=features.normalized_query,
target_terms=features.target_terms,
anchors=anchors,
confidence=0.0,
routing_mode="llm_fallback",
llm_router_used=True,
reason_short="llm route unresolved",
scope_type=scope_type,
)
def route(
self,
*,
@@ -12,6 +35,7 @@ class V2FallbackRouter:
features: QueryFeatures,
anchors,
llm_attempted: bool,
scope_type: str = V2ScopeType.UNKNOWN,
) -> V2RouteResult:
if features.file_markers:
return self._build_docs_result(
@@ -21,6 +45,32 @@ class V2FallbackRouter:
subintent=V2Subintent.FIND_FILES,
llm_attempted=llm_attempted,
reason="fallback file markers",
scope_type=scope_type,
)
if self._has_docs_update_signal(features):
return V2RouteResult(
routing_domain=V2Domain.DOCS,
intent=V2Intent.DOC_UPDATE,
subintent=V2Subintent.FROM_FEATURE,
user_query=user_query,
normalized_query=features.normalized_query,
target_terms=features.target_terms,
anchors=anchors,
confidence=0.0,
routing_mode=self._routing_mode(llm_attempted),
llm_router_used=llm_attempted,
reason_short="fallback docs update from feature",
scope_type=scope_type,
)
if self._has_api_exposed_signal(features):
return self._build_docs_result(
user_query=user_query,
features=features,
anchors=anchors,
subintent=V2Subintent.API_EXPOSED,
llm_attempted=llm_attempted,
reason="fallback docs api exposed",
scope_type=scope_type,
)
if self._has_docs_signal(features):
return self._build_docs_result(
@@ -30,6 +80,7 @@ class V2FallbackRouter:
subintent=V2Subintent.SUMMARY,
llm_attempted=llm_attempted,
reason="fallback docs summary",
scope_type=scope_type,
)
return V2RouteResult(
routing_domain=V2Domain.GENERAL,
@@ -43,6 +94,7 @@ class V2FallbackRouter:
routing_mode=self._routing_mode(llm_attempted),
llm_router_used=llm_attempted,
reason_short="fallback general summary",
scope_type=scope_type,
)
def _build_docs_result(
@@ -54,6 +106,7 @@ class V2FallbackRouter:
subintent: str,
llm_attempted: bool,
reason: str,
scope_type: str = V2ScopeType.UNKNOWN,
) -> V2RouteResult:
return V2RouteResult(
routing_domain=V2Domain.DOCS,
@@ -67,6 +120,7 @@ class V2FallbackRouter:
routing_mode=self._routing_mode(llm_attempted),
llm_router_used=llm_attempted,
reason_short=reason,
scope_type=scope_type,
)
def _has_docs_signal(self, features: QueryFeatures) -> bool:
@@ -82,5 +136,30 @@ class V2FallbackRouter:
)
)
def _has_api_exposed_signal(self, features: QueryFeatures) -> bool:
query = features.normalized_query.lower()
has_api = any(marker in query for marker in ("api", "эндпоинт", "endpoint", "роут", "route", "метод"))
has_listing = any(marker in query for marker in ("какие", "список", "перечисли", "все", "available", "list"))
return has_api and has_listing and not features.endpoint_paths and not features.file_markers
def _has_docs_update_signal(self, features: QueryFeatures) -> bool:
query = features.normalized_query.lower()
has_update = any(
marker in query
for marker in (
"обнов",
"измен",
"внести правк",
"docs update",
"update documentation",
"документац",
)
)
has_feature = any(
marker in query
for marker in ("системной аналитик", "feature", ".md", "confluence", "from feature")
)
return has_update and has_feature
def _routing_mode(self, llm_attempted: bool) -> str:
return "llm_fallback" if llm_attempted else "deterministic_fallback"
@@ -17,10 +17,18 @@ class V2LlmRouter:
self._prompt_name = prompt_name
self._catalog = catalog or V2RouteCatalog()
def classify(self, *, normalized_query: str, target_terms: list[str], anchors: dict) -> dict | None:
def classify(
self,
*,
normalized_query: str,
target_terms: list[str],
anchors: dict,
scope_type: str = "unknown",
) -> dict | None:
payload = {
"normalized_query": normalized_query,
"target_terms": target_terms,
"scope_type": scope_type,
"anchors": anchors,
"allowed_routes": self._catalog.allowed_routes(),
}
@@ -3,9 +3,12 @@ namespace: v2_intent_router
prompts:
route: |
Ты выбираешь маршрут для узкого процесса v2.
Поле `scope_type` и блок `anchors` с `candidate_*` — это предварительная привязка к каталогу документации текущей RAG-сессии (детерминированно извлечённые кандидаты). Не выдумывай домены, сущности и API, которых нет в этих полях; используй их для снятия неоднозначности.
Основной принцип:
- DOCS / DOC_EXPLAIN / FIND_FILES: запрос просит найти файл, документ или путь.
- DOCS / DOC_EXPLAIN / API_EXPOSED: запрос просит перечислить доступные API-методы/эндпоинты.
- DOCS / DOC_EXPLAIN / SUMMARY: запрос просит объяснить документацию, endpoint, архитектуру, процесс или сущность.
- DOCS / DOC_UPDATE / FROM_FEATURE: запрос просит обновить документацию по системной аналитике (feature markdown/confluence).
- GENERAL / GENERAL_QA / SUMMARY: общий обзорный вопрос без явного запроса к документации.
Используй только маршруты из поля `allowed_routes`.
@@ -17,8 +20,8 @@ prompts:
Ответь только JSON-объектом вида:
{
"routing_domain": "GENERAL" | "DOCS",
"intent": "GENERAL_QA" | "DOC_EXPLAIN",
"subintent": "SUMMARY" | "FIND_FILES",
"intent": "GENERAL_QA" | "DOC_EXPLAIN" | "DOC_UPDATE",
"subintent": "SUMMARY" | "FIND_FILES" | "API_EXPOSED" | "FROM_FEATURE",
"confidence": 0.0-1.0,
"reason_short": "короткая причина"
}
@@ -1,12 +1,14 @@
from __future__ import annotations
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2Subintent
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
class V2RouteCatalog:
_ALLOWED_ROUTES = (
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES),
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED),
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY),
(V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE),
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY),
)
-304
View File
@@ -1,304 +0,0 @@
"""Процесс v2: роутинг, план retrieval, вызов rag API, сборка evidence и workflow."""
from __future__ import annotations
from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
from app.core.agent.processes.v2.intent_router import V2IntentRouter
from app.core.agent.processes.v2.models import V2Intent, V2Subintent
from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
RagRowIndex,
merge_row_lists,
normalize_doc_path,
normalized_path_set,
row_path,
seed_candidates_from_target_hints,
)
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
from app.core.agent.processes.base import AgentProcess, ProcessResult
from app.core.agent.utils.llm import AgentLlmService
class V2Process(AgentProcess):
version = "v2"
def __init__(
self,
llm: AgentLlmService,
policy_resolver: V2RetrievalPolicyResolver,
rag_adapter: V2RagRetrievalAdapter,
evidence_assembler: DocsEvidenceAssembler,
evidence_gate: DocsEvidenceGate | None = None,
router: V2IntentRouter | None = None,
docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
general_summary_prompt_name: str = "v2_general.summary_answer",
workflow_llm_enabled: bool = True,
) -> None:
self._router = router or V2IntentRouter()
self._policy_resolver = policy_resolver
self._rag_adapter = rag_adapter
self._evidence_assembler = evidence_assembler
self._evidence_gate = evidence_gate or DocsEvidenceGate()
self._docs_summary_prompt_name = docs_summary_prompt_name
self._general_summary_prompt_name = general_summary_prompt_name
self._workflow_llm_enabled = workflow_llm_enabled
self._summary_graph = DocsExplainSummaryGraph(llm)
self._find_files_graph = DocsExplainFindFilesGraph()
self._general_summary_graph = GeneralSummaryGraph(llm)
async def run(self, context) -> ProcessResult:
route = self._router.route(context.request.message)
rag_session_id = context.session.active_rag_session_id
context.trace.module("process.v2").log(
"intent_routed",
{
"routing_domain": route.routing_domain,
"intent": route.intent,
"subintent": route.subintent,
"normalized_query": route.normalized_query,
"target_terms": route.target_terms,
"anchors": route_anchor_summary(route),
"confidence": route.confidence,
"routing_mode": route.routing_mode,
"llm_router_used": route.llm_router_used,
"reason_short": route.reason_short,
"rag_session_id": rag_session_id,
},
)
self._log_step(
context,
"router_resolved",
{
"domain": route.routing_domain,
"intent": route.intent,
"subintent": route.subintent,
"confidence": route.confidence,
},
)
self._log_step(
context,
"anchors_extracted",
{
"signal_types": route_anchor_summary(route)["signal_types"],
"endpoint_paths": route.anchors.endpoint_paths,
"target_doc_hints": route.anchors.target_doc_hints,
"matched_aliases": route.anchors.matched_aliases,
"target_terms": route.target_terms,
},
)
self._log_step(
context,
"alias_resolution",
{
"resolved_aliases": route.anchors.matched_aliases,
"target_doc_hints": route.anchors.target_doc_hints,
},
)
if not rag_session_id:
if route.intent == V2Intent.GENERAL_QA:
answer = "Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией."
self._log_step(context, "evidence_gate_checked", {"passed": False, "reason": "missing_rag_session"})
self._log_step(context, "answer_generated", {"answer_mode": "insufficient_evidence"})
return ProcessResult(answer=answer)
return ProcessResult(answer="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией.")
plan = self._policy_resolver.resolve(route)
context.trace.module("process.v2.retrieval_policy").log(
"retrieval_plan_resolved",
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
)
self._log_step(
context,
"retrieval_profile_selected",
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
)
retrieved_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan)
metadata_rows = self._metadata_lookup_candidates(retrieved_rows, route)
rows = self._merge_candidate_rows(retrieved_rows, metadata_rows)
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
self._print_missing_target_hints(route, rows)
context.trace.module("process.v2.rag_retrieval").log(
"rag_rows_fetched",
{
"profile": plan.profile,
"row_count": len(rows),
"rows": [self._trace_row(row) for row in rows],
},
)
self._log_step(
context,
"candidate_generation",
{
"query": route.user_query,
"profile": plan.profile,
"details": {
"target_doc_hints": list(route.anchors.target_doc_hints),
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
},
"resolved_aliases": route.anchors.matched_aliases,
"target_doc_hints": route.anchors.target_doc_hints,
"candidate_docs_before_ranking": [self._trace_row(row) for row in rows[:8]],
"sources": {
"seeded": [self._trace_row(row) for row in retrieved_rows[:5] if row_path(row) in {normalize_doc_path(h) for h in route.anchors.target_doc_hints}],
"metadata_lookup": [self._trace_row(row) for row in metadata_rows[:5]],
"semantic": [self._trace_row(row) for row in retrieved_rows[:5]],
},
},
)
self._log_step(
context,
"retrieval_executed",
{
"query": route.user_query,
"profile": plan.profile,
"row_count": len(rows),
"target_doc_hints": route.anchors.target_doc_hints,
"top_results": [self._trace_row(row) for row in rows[:5]],
},
)
if route.subintent == V2Subintent.FIND_FILES:
files = self._evidence_assembler.assemble_files(rows, route)
gate = self._evidence_gate.check_files(route, files)
context.trace.module("process.v2.evidence").log(
"evidence_assembled",
{"mode": "find_files", "file_count": len(files), "files": [file.path for file in files]},
)
self._log_step(
context,
"evidence_assembled",
{"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)},
)
self._log_ranking(context, files)
self._log_step(
context,
"evidence_gate_checked",
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
)
flow_context = DocsExplainFindFilesContext(
runtime=context,
route=route,
rag_session_id=rag_session_id,
files=files,
gate_decision=gate,
)
flow_context = await self._find_files_graph.run(flow_context)
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
return ProcessResult(answer=flow_context.answer)
documents = self._evidence_assembler.assemble_summaries(rows, route)
gate = self._evidence_gate.check_summaries(route, documents)
context.trace.module("process.v2.evidence").log(
"evidence_assembled",
{"mode": "summary", "document_count": len(documents), "documents": [item.path for item in documents]},
)
self._log_step(
context,
"evidence_assembled",
{"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
)
self._log_ranking(context, documents)
self._log_step(
context,
"evidence_gate_checked",
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
)
if route.intent == V2Intent.GENERAL_QA:
flow_context = GeneralSummaryContext(
runtime=context,
route=route,
prompt_name=self._general_summary_prompt_name,
workflow_llm_enabled=self._workflow_llm_enabled,
documents=documents,
gate_decision=gate,
)
flow_context = await self._general_summary_graph.run(flow_context)
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
return ProcessResult(answer=flow_context.answer)
flow_context = DocsExplainSummaryContext(
runtime=context,
route=route,
rag_session_id=rag_session_id,
prompt_name=self._docs_summary_prompt_name,
workflow_llm_enabled=self._workflow_llm_enabled,
documents=documents,
gate_decision=gate,
)
flow_context = await self._summary_graph.run(flow_context)
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
return ProcessResult(answer=flow_context.answer)
def _trace_row(self, row: dict) -> dict[str, object]:
metadata = row.get("metadata") or {}
content = str(row.get("content") or "").strip()
return {
"layer": str(row.get("layer") or ""),
"path": str(row.get("path") or ""),
"title": str(row.get("title") or ""),
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""),
"entity_name": str(metadata.get("entity_name") or ""),
"summary_text": str(metadata.get("summary_text") or "")[:400],
"section_path": str(metadata.get("section_path") or ""),
"content_preview": content[:400],
}
def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
context.trace.module("process.v2.pipeline").log(step, payload)
def _print_missing_target_hints(self, route, rows: list[dict]) -> None:
if not route.anchors.target_doc_hints:
return
candidate_paths = normalized_path_set(rows)
for hint in route.anchors.target_doc_hints:
if not str(hint or "").strip():
continue
normalized = normalize_doc_path(hint)
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
continue
if normalized not in candidate_paths:
print("ERROR: target doc missing from candidates:", normalized)
def _metadata_lookup_candidates(self, rows: list[dict], route) -> list[dict]:
return DocsMetadataLookupIndex(rows).lookup(route)
def _merge_candidate_rows(self, *groups: list[dict]) -> list[dict]:
return merge_row_lists(*groups)
def _log_ranking(self, context, items: list) -> None:
top_docs: list[dict[str, object]] = []
for item in items[:4]:
top_docs.append(
{
"doc": getattr(item, "path", ""),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
}
)
context.trace.module("process.v2.pipeline").log(
"ranking_explained",
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
},
)
context.trace.module("process.v2.pipeline").log(
"ranking_explained",
{
"top_docs_after_ranking": top_docs,
"ranking_score_breakdown": [
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
}
for item in items[:4]
],
},
)
@@ -1,17 +0,0 @@
from app.core.agent.processes.v2.retrieval.metadata_lookup import DocsMetadataLookupIndex
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
RagRowIndex,
normalize_doc_path,
seed_candidates_from_target_hints,
)
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
__all__ = [
"V2RetrievalPolicyResolver",
"V2RagRetrievalAdapter",
"DocsMetadataLookupIndex",
"normalize_doc_path",
"RagRowIndex",
"seed_candidates_from_target_hints",
]
@@ -1,270 +0,0 @@
"""Intent-aware retrieval policy resolver for process v2."""
from __future__ import annotations
from app.core.agent.processes.v2.anchor_signals import anchor_signal_types
from app.core.agent.processes.v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class _AnchorTermCollector:
def prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
terms = self._hint_basenames(route)
terms.extend(route.anchors.endpoint_paths)
terms.extend(route.target_terms)
terms.extend(route.anchors.file_names)
terms.extend(route.anchors.entity_names)
terms.extend(route.anchors.matched_aliases)
terms.extend(self._process_terms(route))
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
def find_files_patterns(self, route: V2RouteResult) -> list[str]:
if route.anchors.target_doc_hints:
return [f"%{name.lower()}%" for name in self._hint_basenames(route)]
return self.prefer_like_patterns(route)
def api_method_patterns(self, route: V2RouteResult) -> list[str]:
terms = self._hint_basenames(route)
terms.extend(route.anchors.target_doc_hints)
terms.extend(route.anchors.endpoint_paths)
terms.extend(route.target_terms)
patterns: list[str] = []
for term in _unique_terms(terms):
lowered = term.lower()
stripped = lowered.strip("/")
if stripped:
patterns.append(f"%{stripped}%")
if lowered:
patterns.append(f"%{lowered}%")
return _unique_terms(patterns)
def _hint_basenames(self, route: V2RouteResult) -> list[str]:
return [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
def _process_terms(self, route: V2RouteResult) -> list[str]:
terms: list[str] = []
if route.anchors.process_domain:
terms.append(route.anchors.process_domain)
if route.anchors.process_subdomain:
terms.append(route.anchors.process_subdomain)
return terms
class _RouteFilterBuilder:
_API_DOC_PREFIXES = [
"docs/api/",
"docs/endpoints/",
"docs/methods/",
"api/",
"endpoints/",
"methods/",
]
def __init__(self) -> None:
self._terms = _AnchorTermCollector()
def general_filters(self, route: V2RouteResult) -> dict[str, object]:
return {
"prefer_path_prefixes": ["docs/architecture/", "docs/"],
"prefer_like_patterns": ["%readme.md%", "%overview%"],
"target_doc_hints": list(route.anchors.target_doc_hints),
}
def summary_filters(self, route: V2RouteResult) -> dict[str, object]:
if _is_api_method_explain(route):
return self.api_method_filters(route)
filters = self._base_filters(route)
filters["prefer_path_prefixes"] = self._summary_prefixes(route)
filters["prefer_like_patterns"] = self._terms.prefer_like_patterns(route)
if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
filters["path_prefixes"] = ["docs/api/", "docs/"]
return filters
def api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
filters = self._base_filters(route)
filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
filters["prefer_like_patterns"] = self._terms.api_method_patterns(route)
return filters
def find_files_filters(self, route: V2RouteResult) -> dict[str, object]:
filters = self._base_filters(route)
prefixes = self._find_files_prefixes(route)
if prefixes:
filters["path_prefixes"] = prefixes
filters["prefer_path_prefixes"] = self._find_files_prefer_prefixes(route, prefixes)
filters["prefer_like_patterns"] = self._terms.find_files_patterns(route)
return filters
def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
filters: dict[str, object] = {
"target_doc_hints": list(route.anchors.target_doc_hints),
}
if route.anchors.process_domain:
filters["metadata.domain"] = route.anchors.process_domain
if route.anchors.process_subdomain:
filters["metadata.subdomain"] = route.anchors.process_subdomain
return filters
def _find_files_prefixes(self, route: V2RouteResult) -> list[str]:
hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
if hint_prefixes:
return hint_prefixes
file_prefixes = [name for name in route.anchors.file_names if str(name).strip().startswith("docs/")]
derived = _prefixes_from_paths(file_prefixes)
if derived:
return derived
signals = anchor_signal_types(route)
if V2AnchorType.API_ENDPOINT in signals:
return ["docs/api/", "docs/"]
if V2AnchorType.ARCHITECTURE in signals:
return ["docs/architecture/", "docs/"]
if V2AnchorType.LOGIC_FLOW in signals:
return ["docs/logic/", "docs/"]
if V2AnchorType.DOMAIN_ENTITY in signals:
return ["docs/domains/", "docs/"]
return ["docs/"]
def _find_files_prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
preferred = list(prefixes)
if route.anchors.process_domain or route.anchors.process_subdomain:
preferred.extend(["docs/domains/", "docs/logic/"])
return _unique_terms(preferred or ["docs/"])
def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
signals = anchor_signal_types(route)
prefixes: list[str] = []
if V2AnchorType.API_ENDPOINT in signals:
prefixes.extend(["docs/api/", "docs/"])
if V2AnchorType.ARCHITECTURE in signals:
prefixes.extend(["docs/architecture/", "docs/"])
if V2AnchorType.LOGIC_FLOW in signals:
prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
if V2AnchorType.DOMAIN_ENTITY in signals:
prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
return _unique_terms(prefixes or ["docs/"])
class V2RetrievalPolicyResolver:
_GENERAL_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS]
_FIND_FILES_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
_SUMMARY_LAYERS = {
"docs_api_method_explain": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_api_endpoint": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_logic_flow": [
RagLayer.DOCS_WORKFLOW_INDEX,
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_domain_entity": [
RagLayer.DOCS_ENTITY_CATALOG,
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_architecture": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_RELATION_GRAPH,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_generic": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
}
def __init__(self) -> None:
self._filters = _RouteFilterBuilder()
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
if route.intent == V2Intent.GENERAL_QA:
return RetrievalPlan(
profile="general_qa_grounded_summary",
layers=list(self._GENERAL_LAYERS),
limit=8,
filters=self._filters.general_filters(route),
)
if route.subintent == V2Subintent.FIND_FILES:
return RetrievalPlan(
profile="file_lookup",
layers=list(self._FIND_FILES_LAYERS),
limit=12,
filters=self._filters.find_files_filters(route),
)
profile = self._summary_profile(route)
return RetrievalPlan(
profile=profile,
layers=list(self._SUMMARY_LAYERS[profile]),
limit=10 if profile == "docs_api_method_explain" else 8,
filters=self._filters.summary_filters(route),
)
def _summary_profile(self, route: V2RouteResult) -> str:
if _is_api_method_explain(route):
return "docs_api_method_explain"
meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
if len(meaningful) != 1:
return "docs_summary_generic"
mapping = {
V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
}
return mapping.get(next(iter(meaningful)), "docs_summary_generic")
def _prefixes_from_paths(paths: list[str]) -> list[str]:
prefixes = []
for path in paths:
value = str(path).strip().strip("/")
if "/" not in value:
continue
prefix = value.rsplit("/", 1)[0] + "/"
if prefix:
prefixes.append(prefix)
return _unique_terms(prefixes)
def _unique_terms(items: list[str]) -> list[str]:
seen: set[str] = set()
unique: list[str] = []
for raw in items:
value = str(raw or "").strip()
if not value or value in seen:
continue
seen.add(value)
unique.append(value)
return unique
def _is_api_method_explain(route: V2RouteResult) -> bool:
if route.subintent != V2Subintent.SUMMARY:
return False
if route.anchors.endpoint_paths:
return True
if _has_api_like_hints(route.anchors.target_doc_hints):
return True
return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
def _has_api_like_hints(hints: list[str]) -> bool:
for hint in hints:
value = str(hint or "").strip().lower()
if not value:
continue
if value.startswith("/"):
return True
if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
return True
if "endpoint" in value or "method" in value:
return True
return False
@@ -0,0 +1,194 @@
"""Процесс v2: роутинг запроса и dispatch в workflow."""
from __future__ import annotations
from typing import Any
from app.core.agent.processes.base import AgentProcess, ProcessResult
from app.core.agent.processes.v2.intent_router import V2IntentRouter
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
DocExplainApiExposedContext,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
from app.core.agent.processes.v2.workflows.doc_update_from_feature.graph import DocUpdateFromFeatureGraph
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import (
DocUpdateFromFeatureContext,
)
from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext
from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph
from app.core.agent.utils.llm import AgentLlmService
from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class V2Process(AgentProcess):
version = "v2"
def __init__(
self,
llm: AgentLlmService,
policy_resolver: RetrievalPlanResolver,
rag_adapter: V2RagRetrievalAdapter,
evidence_assembler: DocsEvidenceAssembler,
evidence_gate: DocsEvidenceGate | None = None,
router: V2IntentRouter | None = None,
docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
general_summary_prompt_name: str = "v2_general.summary_answer",
workflow_llm_enabled: bool = True,
doc_rules_enabled: bool = True,
) -> None:
self._router = router or V2IntentRouter()
gate = evidence_gate or DocsEvidenceGate()
self._docs_summary_prompt_name = docs_summary_prompt_name
self._general_summary_prompt_name = general_summary_prompt_name
self._workflow_llm_enabled = workflow_llm_enabled
self._doc_rules_enabled = doc_rules_enabled
self._workflows: dict[tuple[str, str, str], Any] = {
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY): DocExplainSummaryGraph(
llm,
policy_resolver=policy_resolver,
rag_adapter=rag_adapter,
evidence_assembler=evidence_assembler,
evidence_gate=gate,
),
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES): DocExplainFindFilesGraph(
policy_resolver=policy_resolver,
rag_adapter=rag_adapter,
evidence_assembler=evidence_assembler,
evidence_gate=gate,
),
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED): DocExplainApiExposedGraph(
policy_resolver=policy_resolver,
rag_adapter=rag_adapter,
),
(V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE): DocUpdateFromFeatureGraph(
llm=llm,
doc_rules_enabled=doc_rules_enabled,
),
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY): GeneralQaSummaryGraph(
llm,
policy_resolver=policy_resolver,
rag_adapter=rag_adapter,
evidence_assembler=evidence_assembler,
evidence_gate=gate,
),
}
async def run(self, context) -> ProcessResult:
rag_session_id = context.session.active_rag_session_id or ""
route = self._router.route(context.request.message, rag_session_id=rag_session_id or None)
context.trace.module("process.v2").log(
"intent_routed",
{
"routing_domain": route.routing_domain,
"intent": route.intent,
"subintent": route.subintent,
"normalized_query": route.normalized_query,
"target_terms": route.target_terms,
"anchors": route_anchor_summary(route),
"confidence": route.confidence,
"routing_mode": route.routing_mode,
"llm_router_used": route.llm_router_used,
"reason_short": route.reason_short,
"rag_session_id": rag_session_id,
},
)
self._log_step(
context,
"router_resolved",
{
"domain": route.routing_domain,
"intent": route.intent,
"subintent": route.subintent,
"confidence": route.confidence,
},
)
self._log_step(
context,
"anchors_extracted",
{
"signal_types": route_anchor_summary(route)["signal_types"],
"endpoint_paths": route.anchors.endpoint_paths,
"target_doc_hints": route.anchors.target_doc_hints,
"matched_aliases": route.anchors.matched_aliases,
"target_terms": route.target_terms,
},
)
self._log_step(
context,
"alias_resolution",
{
"resolved_aliases": route.anchors.matched_aliases,
"target_doc_hints": route.anchors.target_doc_hints,
},
)
flow_context = await self._run_workflow(context, route, rag_session_id)
if flow_context.answer_generated_payload is not None:
self._log_step(context, "answer_generated", dict(flow_context.answer_generated_payload))
changeset = list(getattr(flow_context, "changeset", []) or [])
apply_changeset = bool(getattr(flow_context, "apply_changeset", False))
return ProcessResult(
answer=flow_context.answer,
changeset=changeset,
apply_changeset=apply_changeset,
)
def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
context.trace.module("process.v2.pipeline").log(step, payload)
async def _run_workflow(self, runtime_context, route, rag_session_id: str):
workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent))
if workflow is None:
raise ValueError(f"Unsupported v2 workflow route: {(route.routing_domain, route.intent, route.subintent)!r}")
if route.intent == V2Intent.GENERAL_QA:
return await workflow.run(
GeneralQaSummaryContext(
runtime=runtime_context,
route=route,
rag_session_id=rag_session_id,
prompt_name=self._general_summary_prompt_name,
workflow_llm_enabled=self._workflow_llm_enabled,
)
)
if route.subintent == V2Subintent.FIND_FILES:
return await workflow.run(
DocExplainFindFilesContext(
runtime=runtime_context,
route=route,
rag_session_id=rag_session_id,
)
)
if route.subintent == V2Subintent.API_EXPOSED:
return await workflow.run(
DocExplainApiExposedContext(
runtime=runtime_context,
route=route,
rag_session_id=rag_session_id,
)
)
if route.intent == V2Intent.DOC_UPDATE and route.subintent == V2Subintent.FROM_FEATURE:
return await workflow.run(
DocUpdateFromFeatureContext(
runtime=runtime_context,
route=route,
rag_session_id=rag_session_id,
doc_rules_enabled=self._doc_rules_enabled,
)
)
return await workflow.run(
DocExplainSummaryContext(
runtime=runtime_context,
route=route,
rag_session_id=rag_session_id,
prompt_name=self._docs_summary_prompt_name,
workflow_llm_enabled=self._workflow_llm_enabled,
)
)
@@ -0,0 +1,17 @@
# DOC_EXPLAIN / API_EXPOSED Workflow
## Контракт сабинтента
| Поле | Значение |
|---|---|
| `domain` | `DOCS` |
| `intent` | `DOC_EXPLAIN` |
| `subintent` | `API_EXPOSED` |
| `workflow_id` | `v2.docs_explain.api_exposed` |
| `source` | `workflow.v2.api_exposed` |
## Выходной формат
Ответ формируется детерминированно как список endpoint-путей (`/path`) по одному на строку.
Scope учитывается через retrieval-policy фильтры `metadata.domain`/`metadata.subdomain` и path-префиксы API-документации.
@@ -0,0 +1,4 @@
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
__all__ = ["DocExplainApiExposedGraph"]
@@ -0,0 +1,48 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.build_api_exposed_evidence_step import (
BuildApiExposedEvidenceStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.fetch_rag_rows_step import FetchRagRowsStep
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.finalize_api_exposed_answer_step import (
FinalizeApiExposedAnswerStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.require_rag_session_step import (
RequireRagSessionStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.resolve_retrieval_plan_step import (
ResolveRetrievalPlanStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
ApiEndpointCollector,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.buffered_graph import (
DocExplainApiExposedWorkflowGraph,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
DocExplainApiExposedContext,
)
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class DocExplainApiExposedGraph(DocExplainApiExposedWorkflowGraph[DocExplainApiExposedContext]):
def __init__(
self,
policy_resolver: RetrievalPlanResolver,
rag_adapter: V2RagRetrievalAdapter,
) -> None:
super().__init__(
workflow_id="v2.docs_explain.api_exposed",
source="workflow.v2.api_exposed",
steps=[
RequireRagSessionStep(
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
),
ResolveRetrievalPlanStep(policy_resolver),
FetchRagRowsStep(rag_adapter),
BuildApiExposedEvidenceStep(ApiEndpointCollector()),
FinalizeApiExposedAnswerStep(),
],
)
@@ -0,0 +1,2 @@
"""Steps for DOC_EXPLAIN/API_EXPOSED workflow."""
@@ -0,0 +1,39 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
ApiEndpointCollector,
)
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import ApiWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=ApiWorkflowContext)
class BuildApiExposedEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "build_api_exposed_evidence"
title = "Сборка списка API"
def __init__(self, collector: ApiEndpointCollector) -> None:
self._collector = collector
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
context.endpoints = self._collector.collect(context.retrieved_rows)
context.runtime.trace.module("process.v2.evidence").log(
"evidence_assembled",
{"mode": "api_exposed", "endpoint_count": len(context.endpoints), "endpoints": context.endpoints},
)
log_pipeline_step(
context.runtime,
"evidence_assembled",
{"mode": "api_exposed", "endpoint_count": len(context.endpoints)},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"endpoint_count": len(context.endpoints)}
@@ -0,0 +1,31 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "fetch_rag_rows"
title = "Получение строк из RAG"
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
self._rag_adapter = rag_adapter
async def run(self, context: TContext) -> TContext:
if context.answer or context.retrieval_plan is None:
return context
context.retrieved_rows = await self._rag_adapter.fetch_rows(
context.rag_session_id,
context.route.normalized_query,
context.retrieval_plan,
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"retrieved_row_count": len(context.retrieved_rows)}
@@ -0,0 +1,30 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import DocExplainApiExposedContext
from app.core.agent.utils.workflow import WorkflowStep
class FinalizeApiExposedAnswerStep(WorkflowStep[DocExplainApiExposedContext]):
step_id = "finalize_api_exposed_answer"
title = "Формирование ответа со списком API"
async def run(self, context: DocExplainApiExposedContext) -> DocExplainApiExposedContext:
if context.answer:
return context
if not context.endpoints:
context.answer = "Не нашёл задокументированных API-эндпоинтов в выбранном scope."
context.answer_generated_payload = {
"answer_mode": "insufficient_evidence",
"answer_length": len(context.answer),
}
return context
context.answer = "\n".join(context.endpoints)
context.answer_generated_payload = {
"answer_mode": "deterministic",
"answer_length": len(context.answer),
}
return context
def trace_output(self, context: DocExplainApiExposedContext) -> dict[str, object]:
return {"answer_length": len(context.answer)}
@@ -0,0 +1,30 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "require_rag_session"
title = "Проверка RAG-сессии"
def __init__(self, *, missing_message: str) -> None:
self._missing_message = missing_message
async def run(self, context: TContext) -> TContext:
if context.rag_session_id:
return context
context.answer = self._missing_message
context.answer_generated_payload = {
"answer_mode": "insufficient_evidence",
"answer_length": len(context.answer),
}
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"has_rag_session": bool(context.rag_session_id)}
@@ -0,0 +1,38 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "resolve_retrieval_plan"
title = "Выбор retrieval-плана"
def __init__(self, resolver: RetrievalPlanResolver) -> None:
self._resolver = resolver
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
plan = self._resolver.resolve(context.route)
context.retrieval_plan = plan
context.runtime.trace.module("process.v2.retrieval_policy").log(
"retrieval_plan_resolved",
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
)
log_pipeline_step(
context.runtime,
"retrieval_profile_selected",
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"profile": getattr(context.retrieval_plan, "profile", "")}
@@ -0,0 +1,2 @@
"""Retrieval helpers for DOC_EXPLAIN/API_EXPOSED workflow."""
@@ -0,0 +1,77 @@
from __future__ import annotations
import re
class ApiEndpointCollector:
_METHODS = ("GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS")
_ENDPOINT_VALUE_RE = re.compile(
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
)
_METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)")
_PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)")
_DOC_EXTS = (".md", ".yaml", ".yml", ".json")
def collect(self, rows: list[dict]) -> list[str]:
endpoints: list[str] = []
for row in rows:
self._append_from_endpoint_metadata(endpoints, row)
self._append_from_title_fallback(endpoints, row)
for raw in self._row_candidates(row):
self._append_from_text(endpoints, raw)
return sorted(set(endpoints))
def _append_from_title_fallback(self, out: list[str], row: dict) -> None:
title = str(row.get("title") or "").strip()
if not title:
return
for match in self._PATH_RE.findall(title):
self._append_default(out, match)
def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
metadata = dict(row.get("metadata") or {})
endpoint_value = str(metadata.get("endpoint") or "").strip()
if not endpoint_value:
return
for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
self._append_methods_with_path(out, methods, path)
def _row_candidates(self, row: dict) -> list[str]:
metadata = dict(row.get("metadata") or {})
values = [
metadata.get("name"),
metadata.get("summary_text"),
row.get("title"),
]
return [str(value or "") for value in values if str(value or "").strip()]
def _append_from_text(self, out: list[str], text: str) -> None:
for method, path in self._METHOD_PATH_RE.findall(text):
self._append_with_method(out, method, path)
def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
methods = [
part.strip().upper()
for part in str(methods_raw or "").split("|")
if part.strip().upper() in self._METHODS
]
if not methods:
self._append_default(out, path_raw)
return
for method in methods:
self._append_with_method(out, method, path_raw)
def _append_default(self, out: list[str], raw: str) -> None:
self._append_with_method(out, "GET", raw)
def _append_with_method(self, out: list[str], method: str, raw: str) -> None:
value = str(raw or "").strip().strip("`'\"()[].,:;!?").lower()
if not value.startswith("/"):
return
if value.endswith(self._DOC_EXTS):
return
if len(value.split("/")) < 2:
return
endpoint = f"{method.upper()} {value}"
if endpoint not in out:
out.append(endpoint)
@@ -0,0 +1,64 @@
from __future__ import annotations
from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult, V2Subintent
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class DocExplainApiExposedRetrievalPolicy:
_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG]
_API_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
def supports(self, route: V2RouteResult) -> bool:
return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.API_EXPOSED
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
return RetrievalPlan(
profile="api_exposed",
layers=list(self._LAYERS),
limit=400,
filters=self._filters(route),
)
def _filters(self, route: V2RouteResult) -> dict[str, object]:
query_signals = self._query_signals(route)
filters: dict[str, object] = {
"metadata.type": "api_method",
"prefer_path_prefixes": list(self._API_PREFIXES),
"target_doc_hints": list(route.anchors.target_doc_hints),
"prefer_like_patterns": self._like_patterns(route),
}
if query_signals:
filters["query_signals"] = query_signals
if route.anchors.process_domain:
filters["metadata.domain"] = route.anchors.process_domain
if route.anchors.process_subdomain:
filters["metadata.subdomain"] = route.anchors.process_subdomain
return filters
def _like_patterns(self, route: V2RouteResult) -> list[str]:
raw: list[str] = ["api", "endpoint", "method", "эндпоинт", "метод"]
raw.extend(route.target_terms)
raw.extend(route.anchors.endpoint_paths)
raw.extend(route.anchors.target_doc_hints)
raw.extend(candidate.value for candidate in route.anchors.candidate_apis)
return [f"%{item.lower()}%" for item in _unique(raw)]
def _query_signals(self, route: V2RouteResult) -> list[str]:
raw: list[str] = []
raw.extend(route.target_terms)
raw.extend(route.anchors.endpoint_paths)
blocked = {"api", "endpoint", "method", "эндпоинт", "метод"}
return [item for item in _unique(raw) if item.lower() not in blocked]
def _unique(items: list[str]) -> list[str]:
out: list[str] = []
seen: set[str] = set()
for item in items:
value = str(item or "").strip()
if not value or value in seen:
continue
seen.add(value)
out.append(value)
return out
@@ -0,0 +1,2 @@
"""Runtime helpers for the DOC_EXPLAIN/API_EXPOSED workflow."""
@@ -0,0 +1,42 @@
"""Buffered graph for DOC_EXPLAIN/API_EXPOSED workflow."""
from __future__ import annotations
from typing import TypeVar
from app.core.agent.utils.workflow.context import WorkflowContext
from app.core.agent.utils.workflow.graph import WorkflowGraph
TContext = TypeVar("TContext", bound=WorkflowContext)
class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]):
async def run(self, context: TContext) -> TContext:
trace = context.runtime.trace.module(self._source)
trace.log("workflow_started", {"workflow_id": self._workflow_id})
steps_buffer: list[dict[str, object]] = []
for step in self._steps:
inp = step.trace_input(context)
request_id = context.runtime.request.request_id
await context.runtime.publisher.publish_status(
request_id,
self._source,
f"Шаг workflow: {step.title}.",
{"workflow_id": self._workflow_id, "step_id": step.step_id},
)
context = await step.run(context)
out = step.trace_output(context)
trace.log(
"workflow_step_traced",
{
"workflow_id": self._workflow_id,
"step": {"id": step.step_id, "title": step.title},
"input": inp,
"output": out,
},
)
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context
@@ -0,0 +1,20 @@
from __future__ import annotations
from dataclasses import dataclass, field
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
@dataclass(slots=True)
class DocExplainApiExposedContext:
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
retrieval_plan: RetrievalPlan | None = None
retrieved_rows: list[dict] = field(default_factory=list)
endpoints: list[str] = field(default_factory=list)
answer: str = ""
answer_generated_payload: dict[str, object] | None = None
@@ -0,0 +1,24 @@
"""Context protocols for the DOC_EXPLAIN/API_EXPOSED workflow."""
from __future__ import annotations
from typing import Protocol
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class RetrievalWorkflowContext(Protocol):
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
retrieval_plan: RetrievalPlan | None
retrieved_rows: list[dict]
answer: str
answer_generated_payload: dict[str, object] | None
class ApiWorkflowContext(RetrievalWorkflowContext, Protocol):
endpoints: list[str]
@@ -0,0 +1,8 @@
"""Pipeline logging helpers for DOC_EXPLAIN/API_EXPOSED."""
from __future__ import annotations
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
runtime.trace.module("process.v2.pipeline").log(step, payload)
@@ -0,0 +1,159 @@
# DOC_EXPLAIN / FIND_FILES Workflow
## Контракт сабинтента
| Поле | Значение |
|---|---|
| `domain` | `DOCS` |
| `intent` | `DOC_EXPLAIN` |
| `subintent` | `FIND_FILES` |
| `workflow_id` | `v2.docs_explain.find_files` |
| `source` | `workflow.v2.find_files` |
## Диаграмма флоу
```mermaid
flowchart TD
A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
B --> C["FetchRagRowsStep"]
C --> D["PrepareCandidateRowsStep"]
D --> E["BuildFilesEvidenceStep"]
E --> F["ApplyFilesEvidenceGateStep"]
F --> G["FinalizeFindFilesAnswerStep"]
```
## Шаги процесса
### 1) `RequireRagSessionStep`
Шаг проверяет, есть ли активная RAG-сессия. Если `rag_session_id` пустой, workflow останавливает дальнейший retrieval и пишет пользовательское сообщение в `answer`. Для `find_files` gate-решение на этом шаге обычно не ставится, но механизм поддержан.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rag_session_id` | `V2Process` -> `DocExplainFindFilesContext` | Идентификатор активной RAG-сессии |
| `self._missing_message` | Конфигурация в `graph.py` | Текст ответа, если сессии нет |
| `self._missing_gate` | Конфигурация шага | Опциональный gate для раннего выхода |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой |
| `context.gate_decision` | Заполняется `missing_gate`, если он передан и сессии нет |
| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
### 2) `ResolveRetrievalPlanStep`
Шаг превращает route в retrieval-план через `RetrievalPlanResolver`. Профиль для этого сабинтента — `file_lookup`, с подходящими слоями и фильтрами. Параллельно пишет trace-событие `retrieval_plan_resolved`.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.route` | Результат `intent_router` | Route с `anchors`, `target_terms`, `scope_type` |
| `self._resolver` | DI из `graph.py` | Реализация policy-резолвера |
| `context.answer` | Предыдущие шаги | Если уже есть ответ, шаг пропускается |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог с `profile`, `layers`, `limit`, `filters` |
### 3) `FetchRagRowsStep`
Шаг выполняет retrieval через `V2RagRetrievalAdapter`. Внутри адаптера объединяются seed-строки по `target_doc_hints` и основной retrieval по эмбеддингам/фильтрам плана. Если план не сформирован или уже есть готовый ответ, шаг ничего не делает.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rag_session_id` | Контекст workflow | Сессия для поиска в `rag_chunks` |
| `context.route.normalized_query` | Route | Нормализованный текст запроса |
| `context.retrieval_plan` | Предыдущий шаг | План retrieval |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.retrieved_rows` | `await rag_adapter.fetch_rows(rag_session_id, normalized_query, retrieval_plan)` |
### 4) `PrepareCandidateRowsStep`
Шаг собирает итоговые candidate rows для ранжирования файлов. Он добавляет metadata-lookup кандидаты и подмешивает seed по `target_doc_hints`, затем сохраняет merged-список в `context.rows`. Дополнительно пишет детальный retrieval-trace.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.retrieved_rows` | `FetchRagRowsStep` | Строки после retrieval |
| `context.route` | Route | Нужен для hints/aliases/terms |
| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata lookup |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
| `process.v2.rag_retrieval.rag_rows_fetched` | Лог деталей rows, источников и top результатов |
### 5) `BuildFilesEvidenceStep`
Шаг ранжирует candidate rows в список файлов через `DocsEvidenceAssembler.assemble_files`. На выходе формируется shortlist `RetrievedFile` с оценками и причинами совпадения. Этот shortlist становится опорой для gate и финального ответа.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
| `context.route` | Route | Сигналы маршрута для ranking |
| `self._assembler` | DI из `graph.py` | Сборщик evidence |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.files` | `assemble_files(context.rows, context.route)` |
| `process.v2.evidence.evidence_assembled` | Лог file-count и путей |
### 6) `ApplyFilesEvidenceGateStep`
Шаг проверяет качество shortlist через `DocsEvidenceGate.check_files`. Решение gate определяет, можно ли отвечать детерминированно или нужно более осторожное поведение. Для прозрачности пишет pipeline-лог с полями `passed/reason/answer_mode`.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.route` | Route | Сигналы запроса для gate |
| `context.files` | `BuildFilesEvidenceStep` | Ранжированные файлы |
| `self._gate` | DI из `graph.py` | Правила оценки evidence |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.gate_decision` | `self._gate.check_files(context.route, context.files)` |
| `process.v2.pipeline.evidence_gate_checked` | Лог результата gate |
### 7) `FinalizeFindFilesAnswerStep`
Шаг собирает финальный текстовый ответ без LLM. Если файлов нет, возвращается `insufficient_evidence`; если файл один — отдаётся один путь; если несколько — до 4 путей. Если gate вернул `low_confidence_shortlist`, также возвращается ограниченный список путей.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.files` | `BuildFilesEvidenceStep` | Список найденных файлов |
| `context.gate_decision` | `ApplyFilesEvidenceGateStep` | Режим и причина ответа |
| `context.answer` | Предыдущие шаги | Если уже заполнен, шаг пропускается |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.answer` | Детерминированно: пусто/1 путь/до 4 путей |
| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке формирования |
@@ -0,0 +1,3 @@
from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
__all__ = ["DocExplainFindFilesGraph"]
@@ -0,0 +1,49 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.buffered_graph import DocExplainFindFilesWorkflowGraph
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.apply_files_evidence_gate_step import (
ApplyFilesEvidenceGateStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.build_files_evidence_step import BuildFilesEvidenceStep
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.fetch_rag_rows_step import FetchRagRowsStep
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.finalize_find_files_answer_step import (
FinalizeFindFilesAnswerStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.prepare_candidate_rows_step import (
PrepareCandidateRowsStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.require_rag_session_step import RequireRagSessionStep
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.resolve_retrieval_plan_step import (
ResolveRetrievalPlanStep,
)
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class DocExplainFindFilesGraph(DocExplainFindFilesWorkflowGraph[DocExplainFindFilesContext]):
def __init__(
self,
policy_resolver: RetrievalPlanResolver,
rag_adapter: V2RagRetrievalAdapter,
evidence_assembler: DocsEvidenceAssembler,
evidence_gate: DocsEvidenceGate,
) -> None:
super().__init__(
workflow_id="v2.docs_explain.find_files",
source="workflow.v2.find_files",
steps=[
RequireRagSessionStep(
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
),
ResolveRetrievalPlanStep(policy_resolver),
FetchRagRowsStep(rag_adapter),
PrepareCandidateRowsStep(CandidateRowsBuilder()),
BuildFilesEvidenceStep(evidence_assembler),
ApplyFilesEvidenceGateStep(evidence_gate),
FinalizeFindFilesAnswerStep(),
],
)
@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
class ApplyFilesEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "apply_files_evidence_gate"
title = "Проверка file evidence"
def __init__(self, gate: DocsEvidenceGate) -> None:
self._gate = gate
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
context.gate_decision = self._gate.check_files(context.route, context.files)
log_pipeline_step(
context.runtime,
"evidence_gate_checked",
{
"passed": context.gate_decision.passed,
"reason": context.gate_decision.reason,
"answer_mode": context.gate_decision.answer_mode,
},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
class BuildFilesEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "build_files_evidence"
title = "Сборка file evidence"
def __init__(self, assembler: DocsEvidenceAssembler) -> None:
self._assembler = assembler
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
context.files = self._assembler.assemble_files(context.rows, context.route)
context.runtime.trace.module("process.v2.evidence").log(
"evidence_assembled",
{
"mode": "find_files",
"file_count": len(context.files),
"files": [item.path for item in context.files],
},
)
log_pipeline_step(
context.runtime,
"evidence_assembled",
{
"mode": "find_files",
"primary_file": context.files[0].path if context.files else None,
"file_count": len(context.files),
},
)
log_ranking(context.runtime, context.files)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"file_count": len(context.files)}
@@ -0,0 +1,30 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "fetch_rag_rows"
title = "Получение строк из RAG"
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
self._rag_adapter = rag_adapter
async def run(self, context: TContext) -> TContext:
if context.answer or context.retrieval_plan is None:
return context
context.retrieved_rows = await self._rag_adapter.fetch_rows(
context.rag_session_id,
context.route.normalized_query,
context.retrieval_plan,
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"retrieved_row_count": len(context.retrieved_rows)}
@@ -0,0 +1,34 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
from app.core.agent.utils.workflow import WorkflowStep
class FinalizeFindFilesAnswerStep(WorkflowStep[DocExplainFindFilesContext]):
step_id = "finalize_find_files_answer"
title = "Сборка списка файлов"
async def run(self, context: DocExplainFindFilesContext) -> DocExplainFindFilesContext:
if context.answer:
return context
if not context.files:
context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу."
context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
return context
if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist":
context.answer = "\n".join(item.path for item in context.files[:4])
context.answer_generated_payload = {
"answer_mode": context.gate_decision.answer_mode,
"answer_length": len(context.answer),
}
return context
if len(context.files) == 1:
context.answer = context.files[0].path
context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
return context
context.answer = "\n".join(item.path for item in context.files[:4])
context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
return context
def trace_output(self, context: DocExplainFindFilesContext) -> dict[str, object]:
return {"answer_length": len(context.answer)}
@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_retrieval_trace
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "prepare_candidate_rows"
title = "Подготовка candidate rows"
def __init__(self, builder: CandidateRowsBuilder) -> None:
self._builder = builder
async def run(self, context: TContext) -> TContext:
if context.answer or context.retrieval_plan is None:
return context
prepared = self._builder.build(context.retrieved_rows, context.route)
context.rows = prepared.rows
log_retrieval_trace(
context.runtime,
context.route,
context.retrieval_plan,
context.retrieved_rows,
prepared.metadata_rows,
prepared.rows,
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"row_count": len(context.rows)}
@@ -0,0 +1,43 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "require_rag_session"
title = "Проверка RAG-сессии"
def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
self._missing_message = missing_message
self._missing_gate = missing_gate
async def run(self, context: TContext) -> TContext:
if context.rag_session_id:
return context
context.answer = self._missing_message
if self._missing_gate is not None:
context.gate_decision = self._missing_gate
context.answer_generated_payload = {
"answer_mode": self._missing_gate.answer_mode,
"answer_length": len(context.answer),
}
log_pipeline_step(
context.runtime,
"evidence_gate_checked",
{
"passed": self._missing_gate.passed,
"reason": self._missing_gate.reason,
"answer_mode": self._missing_gate.answer_mode,
},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"has_rag_session": bool(context.rag_session_id)}
@@ -0,0 +1,37 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "resolve_retrieval_plan"
title = "Выбор retrieval-плана"
def __init__(self, resolver: RetrievalPlanResolver) -> None:
self._resolver = resolver
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
plan = self._resolver.resolve(context.route)
context.retrieval_plan = plan
context.runtime.trace.module("process.v2.retrieval_policy").log(
"retrieval_plan_resolved",
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
)
log_pipeline_step(
context.runtime,
"retrieval_profile_selected",
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"profile": getattr(context.retrieval_plan, "profile", "")}
@@ -0,0 +1,2 @@
"""Retrieval-related step helpers for the doc-explain find-files workflow."""
@@ -0,0 +1,43 @@
"""Сборка candidate rows для doc-explain find-files (метаданные + сиды по hints)."""
from __future__ import annotations
from dataclasses import dataclass
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
RagRowIndex,
merge_row_lists,
normalize_doc_path,
normalized_path_set,
seed_candidates_from_target_hints,
)
@dataclass(slots=True)
class CandidateRowsResult:
metadata_rows: list[dict]
rows: list[dict]
class CandidateRowsBuilder:
def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
rows = merge_row_lists(retrieved_rows, metadata_rows)
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
self._print_missing_target_hints(route, rows)
return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
if not route.anchors.target_doc_hints:
return
candidate_paths = normalized_path_set(rows)
for hint in route.anchors.target_doc_hints:
if not str(hint or "").strip():
continue
normalized = normalize_doc_path(hint)
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
continue
if normalized not in candidate_paths:
print("ERROR: target doc missing from candidates:", normalized)
@@ -0,0 +1,99 @@
from __future__ import annotations
from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteResult, V2Subintent
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class DocExplainFindFilesRetrievalPolicy:
_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
def supports(self, route: V2RouteResult) -> bool:
return route.subintent == V2Subintent.FIND_FILES
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
return RetrievalPlan(
profile="file_lookup",
layers=list(self._LAYERS),
limit=12,
filters=self._build_filters(route),
)
def _build_filters(self, route: V2RouteResult) -> dict[str, object]:
filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
if route.anchors.process_domain:
filters["metadata.domain"] = route.anchors.process_domain
if route.anchors.process_subdomain:
filters["metadata.subdomain"] = route.anchors.process_subdomain
prefixes = self._path_prefixes(route)
if prefixes:
filters["path_prefixes"] = prefixes
filters["prefer_path_prefixes"] = self._prefer_prefixes(route, prefixes)
filters["prefer_like_patterns"] = self._like_patterns(route)
return filters
def _path_prefixes(self, route: V2RouteResult) -> list[str]:
hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
if hint_prefixes:
return hint_prefixes
file_prefixes = [item for item in route.anchors.file_names if str(item).strip().startswith("docs/")]
derived = _prefixes_from_paths(file_prefixes)
if derived:
return derived
signals = anchor_signal_types(route)
if V2AnchorType.API_ENDPOINT in signals:
return ["docs/api/", "docs/"]
if V2AnchorType.ARCHITECTURE in signals:
return ["docs/architecture/", "docs/"]
if V2AnchorType.LOGIC_FLOW in signals:
return ["docs/logic/", "docs/"]
if V2AnchorType.DOMAIN_ENTITY in signals:
return ["docs/domains/", "docs/"]
return ["docs/"]
def _prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
preferred = list(prefixes)
if route.anchors.process_domain or route.anchors.process_subdomain:
preferred.extend(["docs/domains/", "docs/logic/"])
return _unique_terms(preferred or ["docs/"])
def _like_patterns(self, route: V2RouteResult) -> list[str]:
if route.anchors.target_doc_hints:
names = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
return [f"%{name.lower()}%" for name in names]
terms = list(route.target_terms)
terms.extend(route.anchors.endpoint_paths)
terms.extend(route.anchors.file_names)
terms.extend(route.anchors.entity_names)
terms.extend(route.anchors.matched_aliases)
if route.anchors.process_domain:
terms.append(route.anchors.process_domain)
if route.anchors.process_subdomain:
terms.append(route.anchors.process_subdomain)
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
def _prefixes_from_paths(paths: list[str]) -> list[str]:
prefixes: list[str] = []
for path in paths:
value = str(path).strip().strip("/")
if "/" not in value:
continue
prefix = value.rsplit("/", 1)[0] + "/"
if prefix:
prefixes.append(prefix)
return _unique_terms(prefixes)
def _unique_terms(items: list[str]) -> list[str]:
seen: set[str] = set()
unique: list[str] = []
for raw in items:
value = str(raw or "").strip()
if not value or value in seen:
continue
seen.add(value)
unique.append(value)
return unique
@@ -0,0 +1,2 @@
"""Runtime helpers for the doc-explain find-files workflow."""
@@ -0,0 +1,46 @@
"""Граф workflow doc-explain find-files: буфер шагов и один сброс в trace (на базе utils.workflow)."""
from __future__ import annotations
from typing import TypeVar
from app.core.agent.utils.workflow.context import WorkflowContext
from app.core.agent.utils.workflow.graph import WorkflowGraph
TContext = TypeVar("TContext", bound=WorkflowContext)
class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]):
"""Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
async def run(self, context: TContext) -> TContext:
trace = context.runtime.trace.module(self._source)
trace.log("workflow_started", {"workflow_id": self._workflow_id})
steps_buffer: list[dict[str, object]] = []
for step in self._steps:
inp = step.trace_input(context)
request_id = context.runtime.request.request_id
await context.runtime.publisher.publish_status(
request_id,
self._source,
f"Шаг workflow: {step.title}.",
{"workflow_id": self._workflow_id, "step_id": step.step_id},
)
context = await step.run(context)
out = step.trace_output(context)
trace.log(
"workflow_step_traced",
{
"workflow_id": self._workflow_id,
"step": {"id": step.step_id, "title": step.title},
"input": inp,
"output": out,
},
)
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
trace.log(
"workflow_trace_flushed",
{"workflow_id": self._workflow_id, "steps": steps_buffer},
)
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context
@@ -0,0 +1,22 @@
from __future__ import annotations
from dataclasses import dataclass, field
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
@dataclass(slots=True)
class DocExplainFindFilesContext:
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
retrieval_plan: RetrievalPlan | None = None
retrieved_rows: list[dict] = field(default_factory=list)
rows: list[dict] = field(default_factory=list)
files: list[RetrievedFile] = field(default_factory=list)
gate_decision: EvidenceGateDecision | None = None
answer: str = ""
answer_generated_payload: dict[str, object] | None = None
@@ -0,0 +1,26 @@
"""Протоколы контекста для workflow doc-explain find-files."""
from __future__ import annotations
from typing import Protocol
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class RetrievalWorkflowContext(Protocol):
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
retrieval_plan: RetrievalPlan | None
retrieved_rows: list[dict]
rows: list[dict]
gate_decision: EvidenceGateDecision | None
answer: str
answer_generated_payload: dict[str, object] | None
class FindFilesWorkflowContext(RetrievalWorkflowContext, Protocol):
files: list[RetrievedFile]
@@ -0,0 +1,106 @@
"""Логирование retrieval/pipeline/ranking для doc-explain find-files."""
from __future__ import annotations
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
runtime.trace.module("process.v2.pipeline").log(step, payload)
def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
runtime.trace.module("process.v2.rag_retrieval").log(
"rag_rows_fetched",
{
"profile": plan.profile,
"row_count": len(rows),
"rows": [trace_row(row) for row in rows],
},
)
hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
log_pipeline_step(
runtime,
"candidate_generation",
{
"query": route.user_query,
"profile": plan.profile,
"details": {
"target_doc_hints": list(route.anchors.target_doc_hints),
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
},
"resolved_aliases": route.anchors.matched_aliases,
"target_doc_hints": route.anchors.target_doc_hints,
"candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
"sources": {
"seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
"metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
"semantic": [trace_row(row) for row in retrieved_rows[:5]],
},
},
)
log_pipeline_step(
runtime,
"retrieval_executed",
{
"query": route.user_query,
"profile": plan.profile,
"row_count": len(rows),
"target_doc_hints": route.anchors.target_doc_hints,
"top_results": [trace_row(row) for row in rows[:5]],
},
)
def log_ranking(runtime, items: list) -> None:
top_docs: list[dict[str, object]] = []
for item in items[:4]:
top_docs.append(
{
"doc": getattr(item, "path", ""),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
}
)
log_pipeline_step(
runtime,
"ranking_explained",
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
},
)
log_pipeline_step(
runtime,
"ranking_explained",
{
"top_docs_after_ranking": top_docs,
"ranking_score_breakdown": [
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
}
for item in items[:4]
],
},
)
def trace_row(row: dict) -> dict[str, object]:
metadata = row.get("metadata") or {}
content = str(row.get("content") or "").strip()
return {
"layer": str(row.get("layer") or ""),
"path": str(row.get("path") or ""),
"title": str(row.get("title") or ""),
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
"entity_name": str(metadata.get("entity_name") or ""),
"summary_text": str(metadata.get("summary_text") or "")[:400],
"section_path": str(metadata.get("section_path") or ""),
"metadata_domain": str(metadata.get("domain") or ""),
"metadata_subdomain": str(metadata.get("subdomain") or ""),
"content_preview": content[:400],
}
@@ -0,0 +1,162 @@
# DOC_EXPLAIN / SUMMARY Workflow
## Контракт сабинтента
| Поле | Значение |
|---|---|
| `domain` | `DOCS` |
| `intent` | `DOC_EXPLAIN` |
| `subintent` | `SUMMARY` |
| `workflow_id` | `v2.docs_explain.summary` |
| `source` | `workflow.v2.summary` |
## Диаграмма флоу
```mermaid
flowchart TD
A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
B --> C["FetchRagRowsStep"]
C --> D["PrepareCandidateRowsStep"]
D --> E["BuildSummaryEvidenceStep"]
E --> F["ApplySummaryEvidenceGateStep"]
F --> G["GenerateSummaryAnswerStep"]
```
## Шаги процесса
### 1) `RequireRagSessionStep`
Шаг валидирует наличие активной RAG-сессии до retrieval. Если `rag_session_id` отсутствует, workflow завершает обработку и записывает сообщение об отсутствии опоры. Это предотвращает запуск последующих шагов без индекса проекта.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rag_session_id` | `V2Process` -> `DocExplainSummaryContext` | Идентификатор RAG-сессии |
| `self._missing_message` | Конфигурация в `graph.py` | Ответ при отсутствии сессии |
| `self._missing_gate` | Конфиг шага | Опциональный gate для раннего выхода |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.answer` | Заполняется `missing_message` при пустом `rag_session_id` |
| `context.gate_decision` | Заполняется, если передан `missing_gate` |
| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
### 2) `ResolveRetrievalPlanStep`
Шаг вызывает policy-резолвер и строит `RetrievalPlan` для doc-summary сценария. Профиль и фильтры зависят от сигналов route: endpoint/architecture/logic/domain. Результат сохраняется в контекст и логируется в trace.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.route` | `intent_router` | Route с `target_terms` и `anchors` |
| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` |
| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог профиля, слоёв, лимита и фильтров |
### 3) `FetchRagRowsStep`
Шаг выполняет retrieval по сформированному плану. Внутри адаптера объединяются seed-результаты по `target_doc_hints` и основной retrieval. Это даёт более устойчивую выдачу как по явным hints, так и по семантическому совпадению.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rag_session_id` | Контекст workflow | Сессия для поиска |
| `context.route.normalized_query` | Route | Нормализованный запрос |
| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` |
### 4) `PrepareCandidateRowsStep`
Шаг обогащает найденные строки через metadata lookup и подмешивает кандидатов из `target_doc_hints`. На выходе формируется `context.rows`, который уже используется для построения summary-evidence. Шаг также пишет подробные retrieval-логи для анализа ранжирования.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.retrieved_rows` | `FetchRagRowsStep` | Строки retrieval |
| `context.route` | Route | Сигналы hints/aliases/terms |
| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata поиска |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
| `process.v2.rag_retrieval.rag_rows_fetched` | Лог выборки и источников кандидатов |
### 5) `BuildSummaryEvidenceStep`
Шаг преобразует candidate rows в список `RetrievedSummary` через `DocsEvidenceAssembler.assemble_summaries`. Здесь формируется shortlist документов с оценками и причинами совпадения, который затем проверяется gate. Дополнительно логируются ranking-разборы.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
| `context.route` | Route | Сигналы для ranking |
| `self._assembler` | DI из `graph.py` | Сборщик summary evidence |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.documents` | `assemble_summaries(context.rows, context.route)` |
| `process.v2.evidence.evidence_assembled` | Лог `document_count` и списка путей |
### 6) `ApplySummaryEvidenceGateStep`
Шаг оценивает достаточность и надёжность собранного summary-evidence. Решение gate влияет на дальнейшее формирование ответа: либо прямой отказ/ограничение, либо генерация summary. Результат логируется в pipeline.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.route` | Route | Контекст запроса |
| `context.documents` | `BuildSummaryEvidenceStep` | Кандидаты summary |
| `self._gate` | DI из `graph.py` | Правила валидации evidence |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` |
| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` |
### 7) `GenerateSummaryAnswerStep`
Шаг формирует финальный ответ: gate-message, детерминированный ответ или LLM-генерация. При LLM-ветке собирается `prompt_input` из пользовательского запроса, route-сигналов и найденных summary-блоков. Итог всегда записывается в `context.answer` и сопровождается `answer_generated_payload`.
**Входные параметры**
| Параметр | Откуда берётся | Описание |
|---|---|---|
| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы |
| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Режим допуска к генерации |
| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм |
| `context.prompt_name` | `V2Process` | Имя prompt-шаблона |
| `self._llm` | DI из `graph.py` | LLM-сервис |
**Выходные параметры**
| Параметр | Как формируется |
|---|---|
| `context.prompt_input` | Склеивается из route + shortlist документов |
| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` |
| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по выбранной ветке |
@@ -0,0 +1,3 @@
from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
__all__ = ["DocExplainSummaryGraph"]
@@ -0,0 +1,51 @@
from __future__ import annotations
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.buffered_graph import DocExplainSummaryWorkflowGraph
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.apply_summary_evidence_gate_step import (
ApplySummaryEvidenceGateStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.fetch_rag_rows_step import FetchRagRowsStep
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.generate_summary_answer_step import (
GenerateSummaryAnswerStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.prepare_candidate_rows_step import (
PrepareCandidateRowsStep,
)
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.require_rag_session_step import RequireRagSessionStep
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.resolve_retrieval_plan_step import (
ResolveRetrievalPlanStep,
)
from app.core.agent.utils.llm import AgentLlmService
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class DocExplainSummaryGraph(DocExplainSummaryWorkflowGraph[DocExplainSummaryContext]):
def __init__(
self,
llm: AgentLlmService,
policy_resolver: RetrievalPlanResolver,
rag_adapter: V2RagRetrievalAdapter,
evidence_assembler: DocsEvidenceAssembler,
evidence_gate: DocsEvidenceGate,
) -> None:
super().__init__(
workflow_id="v2.docs_explain.summary",
source="workflow.v2.summary",
steps=[
RequireRagSessionStep(
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
),
ResolveRetrievalPlanStep(policy_resolver),
FetchRagRowsStep(rag_adapter),
PrepareCandidateRowsStep(CandidateRowsBuilder()),
BuildSummaryEvidenceStep(evidence_assembler),
ApplySummaryEvidenceGateStep(evidence_gate),
GenerateSummaryAnswerStep(llm),
],
)
@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "apply_summary_evidence_gate"
title = "Проверка summary evidence"
def __init__(self, gate: DocsEvidenceGate) -> None:
self._gate = gate
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
context.gate_decision = self._gate.check_summaries(context.route, context.documents)
log_pipeline_step(
context.runtime,
"evidence_gate_checked",
{
"passed": context.gate_decision.passed,
"reason": context.gate_decision.reason,
"answer_mode": context.gate_decision.answer_mode,
},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "build_summary_evidence"
title = "Сборка summary evidence"
def __init__(self, assembler: DocsEvidenceAssembler) -> None:
self._assembler = assembler
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
context.documents = self._assembler.assemble_summaries(context.rows, context.route)
context.runtime.trace.module("process.v2.evidence").log(
"evidence_assembled",
{
"mode": "summary",
"document_count": len(context.documents),
"documents": [item.path for item in context.documents],
},
)
log_pipeline_step(
context.runtime,
"evidence_assembled",
{
"mode": "summary",
"primary_doc": context.documents[0].path if context.documents else None,
"document_count": len(context.documents),
},
)
log_ranking(context.runtime, context.documents)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"document_count": len(context.documents)}
@@ -0,0 +1,30 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "fetch_rag_rows"
title = "Получение строк из RAG"
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
self._rag_adapter = rag_adapter
async def run(self, context: TContext) -> TContext:
if context.answer or context.retrieval_plan is None:
return context
context.retrieved_rows = await self._rag_adapter.fetch_rows(
context.rag_session_id,
context.route.normalized_query,
context.retrieval_plan,
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"retrieved_row_count": len(context.retrieved_rows)}
@@ -3,28 +3,36 @@ from __future__ import annotations
import asyncio
import json
from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
from app.core.agent.utils.llm import AgentLlmService
from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
from app.core.agent.utils.workflow import WorkflowStep
class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
class GenerateSummaryAnswerStep(WorkflowStep[DocExplainSummaryContext]):
step_id = "generate_summary_answer"
title = "Сборка ответа по summary"
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
async def run(self, context: DocsExplainSummaryContext) -> DocsExplainSummaryContext:
async def run(self, context: DocExplainSummaryContext) -> DocExplainSummaryContext:
if context.answer:
return context
if context.gate_decision is not None and not context.gate_decision.passed:
context.answer = context.gate_decision.message
context.answer_generated_payload = {
"answer_mode": context.gate_decision.answer_mode,
"answer_length": len(context.answer),
}
return context
if not context.workflow_llm_enabled:
context.answer = self._build_deterministic_answer(context)
context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
if not context.documents:
context.answer = "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
return context
context.prompt_input = self._build_prompt_input(context)
request_id = context.runtime.request.request_id
@@ -35,9 +43,10 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
log_context=f"agent:{request_id}",
trace=context.runtime.trace.module("workflow.v2.summary.llm"),
)
context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
def _build_prompt_input(self, context: DocsExplainSummaryContext) -> str:
def _build_prompt_input(self, context: DocExplainSummaryContext) -> str:
blocks = [
f"Запрос пользователя:\n{context.route.user_query}",
"Сигналы запроса:\n" + json.dumps(route_anchor_summary(context.route), ensure_ascii=False, indent=2),
@@ -52,17 +61,13 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
)
return "\n\n".join(blocks)
def _build_deterministic_answer(self, context: DocsExplainSummaryContext) -> str:
def _build_deterministic_answer(self, context: DocExplainSummaryContext) -> str:
if not context.documents:
return "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
lines = []
primary = context.documents[0]
lines.append(primary.summary)
lines.append("")
lines.append("Файлы-источники:")
lines = [context.documents[0].summary, "", "Файлы-источники:"]
for item in context.documents:
lines.append(f"- {item.path}")
return "\n".join(lines)
def trace_output(self, context: DocsExplainSummaryContext) -> dict[str, object]:
def trace_output(self, context: DocExplainSummaryContext) -> dict[str, object]:
return {"answer_length": len(context.answer)}
@@ -0,0 +1,36 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_retrieval_trace
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "prepare_candidate_rows"
title = "Подготовка candidate rows"
def __init__(self, builder: CandidateRowsBuilder) -> None:
self._builder = builder
async def run(self, context: TContext) -> TContext:
if context.answer or context.retrieval_plan is None:
return context
prepared = self._builder.build(context.retrieved_rows, context.route)
context.rows = prepared.rows
log_retrieval_trace(
context.runtime,
context.route,
context.retrieval_plan,
context.retrieved_rows,
prepared.metadata_rows,
prepared.rows,
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"row_count": len(context.rows)}
@@ -0,0 +1,43 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "require_rag_session"
title = "Проверка RAG-сессии"
def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
self._missing_message = missing_message
self._missing_gate = missing_gate
async def run(self, context: TContext) -> TContext:
if context.rag_session_id:
return context
context.answer = self._missing_message
if self._missing_gate is not None:
context.gate_decision = self._missing_gate
context.answer_generated_payload = {
"answer_mode": self._missing_gate.answer_mode,
"answer_length": len(context.answer),
}
log_pipeline_step(
context.runtime,
"evidence_gate_checked",
{
"passed": self._missing_gate.passed,
"reason": self._missing_gate.reason,
"answer_mode": self._missing_gate.answer_mode,
},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"has_rag_session": bool(context.rag_session_id)}
@@ -0,0 +1,37 @@
from __future__ import annotations
from typing import Generic, TypeVar
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
from app.core.agent.utils.workflow import WorkflowStep
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
step_id = "resolve_retrieval_plan"
title = "Выбор retrieval-плана"
def __init__(self, resolver: RetrievalPlanResolver) -> None:
self._resolver = resolver
async def run(self, context: TContext) -> TContext:
if context.answer:
return context
plan = self._resolver.resolve(context.route)
context.retrieval_plan = plan
context.runtime.trace.module("process.v2.retrieval_policy").log(
"retrieval_plan_resolved",
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
)
log_pipeline_step(
context.runtime,
"retrieval_profile_selected",
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
)
return context
def trace_output(self, context: TContext) -> dict[str, object]:
return {"profile": getattr(context.retrieval_plan, "profile", "")}
@@ -0,0 +1,2 @@
"""Retrieval-related step helpers for the doc-explain summary workflow."""
@@ -0,0 +1,43 @@
"""Сборка candidate rows для doc-explain summary (метаданные + сиды по hints)."""
from __future__ import annotations
from dataclasses import dataclass
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
RagRowIndex,
merge_row_lists,
normalize_doc_path,
normalized_path_set,
seed_candidates_from_target_hints,
)
@dataclass(slots=True)
class CandidateRowsResult:
metadata_rows: list[dict]
rows: list[dict]
class CandidateRowsBuilder:
def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
rows = merge_row_lists(retrieved_rows, metadata_rows)
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
self._print_missing_target_hints(route, rows)
return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
if not route.anchors.target_doc_hints:
return
candidate_paths = normalized_path_set(rows)
for hint in route.anchors.target_doc_hints:
if not str(hint or "").strip():
continue
normalized = normalize_doc_path(hint)
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
continue
if normalized not in candidate_paths:
print("ERROR: target doc missing from candidates:", normalized)
@@ -0,0 +1,168 @@
from __future__ import annotations
from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
from app.core.agent.utils.process_v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
from app.core.rag.contracts.enums import RagLayer
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class DocExplainSummaryRetrievalPolicy:
_SUMMARY_LAYERS = {
"docs_api_method_explain": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_api_endpoint": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_FACT_INDEX,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_logic_flow": [
RagLayer.DOCS_WORKFLOW_INDEX,
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_domain_entity": [
RagLayer.DOCS_ENTITY_CATALOG,
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_architecture": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_RELATION_GRAPH,
RagLayer.DOCS_DOC_CHUNKS,
],
"docs_summary_generic": [
RagLayer.DOCS_DOCUMENT_CATALOG,
RagLayer.DOCS_DOC_CHUNKS,
],
}
_API_DOC_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
def supports(self, route: V2RouteResult) -> bool:
return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.SUMMARY
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
profile = self._profile(route)
return RetrievalPlan(
profile=profile,
layers=list(self._SUMMARY_LAYERS[profile]),
limit=10 if profile == "docs_api_method_explain" else 8,
filters=self._filters(route),
)
def _filters(self, route: V2RouteResult) -> dict[str, object]:
if self._is_api_method_explain(route):
return self._api_method_filters(route)
filters = self._base_filters(route)
filters["prefer_path_prefixes"] = self._summary_prefixes(route)
filters["prefer_like_patterns"] = self._prefer_like_patterns(route)
if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
filters["path_prefixes"] = ["docs/api/", "docs/"]
return filters
def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
if route.anchors.process_domain:
filters["metadata.domain"] = route.anchors.process_domain
if route.anchors.process_subdomain:
filters["metadata.subdomain"] = route.anchors.process_subdomain
return filters
def _api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
filters = self._base_filters(route)
filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
filters["prefer_like_patterns"] = self._api_method_patterns(route)
return filters
def _profile(self, route: V2RouteResult) -> str:
if self._is_api_method_explain(route):
return "docs_api_method_explain"
meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
if len(meaningful) != 1:
return "docs_summary_generic"
mapping = {
V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
}
return mapping.get(next(iter(meaningful)), "docs_summary_generic")
def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
signals = anchor_signal_types(route)
prefixes: list[str] = []
if V2AnchorType.API_ENDPOINT in signals:
prefixes.extend(["docs/api/", "docs/"])
if V2AnchorType.ARCHITECTURE in signals:
prefixes.extend(["docs/architecture/", "docs/"])
if V2AnchorType.LOGIC_FLOW in signals:
prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
if V2AnchorType.DOMAIN_ENTITY in signals:
prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
return _unique_terms(prefixes or ["docs/"])
def _prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
terms.extend(route.anchors.endpoint_paths)
terms.extend(route.target_terms)
terms.extend(route.anchors.file_names)
terms.extend(route.anchors.entity_names)
terms.extend(route.anchors.matched_aliases)
if route.anchors.process_domain:
terms.append(route.anchors.process_domain)
if route.anchors.process_subdomain:
terms.append(route.anchors.process_subdomain)
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
def _api_method_patterns(self, route: V2RouteResult) -> list[str]:
terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
terms.extend(route.anchors.target_doc_hints)
terms.extend(route.anchors.endpoint_paths)
terms.extend(route.target_terms)
patterns: list[str] = []
for term in _unique_terms(terms):
lowered = term.lower()
stripped = lowered.strip("/")
if stripped:
patterns.append(f"%{stripped}%")
if lowered:
patterns.append(f"%{lowered}%")
return _unique_terms(patterns)
def _is_api_method_explain(self, route: V2RouteResult) -> bool:
if route.subintent != V2Subintent.SUMMARY:
return False
if route.anchors.endpoint_paths:
return True
if _has_api_like_hints(route.anchors.target_doc_hints):
return True
return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
def _unique_terms(items: list[str]) -> list[str]:
seen: set[str] = set()
unique: list[str] = []
for raw in items:
value = str(raw or "").strip()
if not value or value in seen:
continue
seen.add(value)
unique.append(value)
return unique
def _has_api_like_hints(hints: list[str]) -> bool:
for hint in hints:
value = str(hint or "").strip().lower()
if not value:
continue
if value.startswith("/"):
return True
if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
return True
if "endpoint" in value or "method" in value:
return True
return False
@@ -0,0 +1,2 @@
"""Runtime helpers for the doc-explain summary workflow."""
@@ -0,0 +1,46 @@
"""Граф workflow doc-explain summary: буфер шагов и один сброс в trace (на базе utils.workflow)."""
from __future__ import annotations
from typing import TypeVar
from app.core.agent.utils.workflow.context import WorkflowContext
from app.core.agent.utils.workflow.graph import WorkflowGraph
TContext = TypeVar("TContext", bound=WorkflowContext)
class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]):
"""Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
async def run(self, context: TContext) -> TContext:
trace = context.runtime.trace.module(self._source)
trace.log("workflow_started", {"workflow_id": self._workflow_id})
steps_buffer: list[dict[str, object]] = []
for step in self._steps:
inp = step.trace_input(context)
request_id = context.runtime.request.request_id
await context.runtime.publisher.publish_status(
request_id,
self._source,
f"Шаг workflow: {step.title}.",
{"workflow_id": self._workflow_id, "step_id": step.step_id},
)
context = await step.run(context)
out = step.trace_output(context)
trace.log(
"workflow_step_traced",
{
"workflow_id": self._workflow_id,
"step": {"id": step.step_id, "title": step.title},
"input": inp,
"output": out,
},
)
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
trace.log(
"workflow_trace_flushed",
{"workflow_id": self._workflow_id, "steps": steps_buffer},
)
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
return context
@@ -0,0 +1,25 @@
from __future__ import annotations
from dataclasses import dataclass, field
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
@dataclass(slots=True)
class DocExplainSummaryContext:
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
prompt_name: str
workflow_llm_enabled: bool = True
retrieval_plan: RetrievalPlan | None = None
retrieved_rows: list[dict] = field(default_factory=list)
rows: list[dict] = field(default_factory=list)
documents: list[RetrievedSummary] = field(default_factory=list)
gate_decision: EvidenceGateDecision | None = None
prompt_input: str = ""
answer: str = ""
answer_generated_payload: dict[str, object] | None = None
@@ -0,0 +1,26 @@
"""Протоколы контекста для workflow doc-explain summary."""
from __future__ import annotations
from typing import Protocol
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
from app.core.rag.retrieval.session_retriever import RetrievalPlan
class RetrievalWorkflowContext(Protocol):
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
retrieval_plan: RetrievalPlan | None
retrieved_rows: list[dict]
rows: list[dict]
gate_decision: EvidenceGateDecision | None
answer: str
answer_generated_payload: dict[str, object] | None
class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol):
documents: list[RetrievedSummary]
@@ -0,0 +1,106 @@
"""Логирование retrieval/pipeline/ranking для doc-explain summary."""
from __future__ import annotations
from app.core.agent.utils.process_v2.models import V2RouteResult
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
runtime.trace.module("process.v2.pipeline").log(step, payload)
def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
runtime.trace.module("process.v2.rag_retrieval").log(
"rag_rows_fetched",
{
"profile": plan.profile,
"row_count": len(rows),
"rows": [trace_row(row) for row in rows],
},
)
hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
log_pipeline_step(
runtime,
"candidate_generation",
{
"query": route.user_query,
"profile": plan.profile,
"details": {
"target_doc_hints": list(route.anchors.target_doc_hints),
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
},
"resolved_aliases": route.anchors.matched_aliases,
"target_doc_hints": route.anchors.target_doc_hints,
"candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
"sources": {
"seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
"metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
"semantic": [trace_row(row) for row in retrieved_rows[:5]],
},
},
)
log_pipeline_step(
runtime,
"retrieval_executed",
{
"query": route.user_query,
"profile": plan.profile,
"row_count": len(rows),
"target_doc_hints": route.anchors.target_doc_hints,
"top_results": [trace_row(row) for row in rows[:5]],
},
)
def log_ranking(runtime, items: list) -> None:
top_docs: list[dict[str, object]] = []
for item in items[:4]:
top_docs.append(
{
"doc": getattr(item, "path", ""),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
}
)
log_pipeline_step(
runtime,
"ranking_explained",
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
"score": getattr(item, "score", 0),
"match_reason": getattr(item, "match_reason", ""),
},
)
log_pipeline_step(
runtime,
"ranking_explained",
{
"top_docs_after_ranking": top_docs,
"ranking_score_breakdown": [
{
"doc": getattr(item, "path", ""),
"score_breakdown": getattr(item, "score_breakdown", {}),
}
for item in items[:4]
],
},
)
def trace_row(row: dict) -> dict[str, object]:
metadata = row.get("metadata") or {}
content = str(row.get("content") or "").strip()
return {
"layer": str(row.get("layer") or ""),
"path": str(row.get("path") or ""),
"title": str(row.get("title") or ""),
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
"entity_name": str(metadata.get("entity_name") or ""),
"summary_text": str(metadata.get("summary_text") or "")[:400],
"section_path": str(metadata.get("section_path") or ""),
"metadata_domain": str(metadata.get("domain") or ""),
"metadata_subdomain": str(metadata.get("subdomain") or ""),
"content_preview": content[:400],
}
@@ -0,0 +1,2 @@
"""DOC_UPDATE/FROM_FEATURE workflow package."""
@@ -0,0 +1,13 @@
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import (
DocRulesChangesetGenerator,
)
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
__all__ = [
"DocRulesBundle",
"DocRulesChangesetGenerator",
"DocRulesLoader",
"DocRulesSelector",
]
@@ -0,0 +1,96 @@
from __future__ import annotations
import hashlib
import json
from pathlib import Path
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.prompt_builder import DocChangePromptBuilder
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
from app.core.agent.utils.llm import AgentLlmService
from app.infra.observability.module_trace import ModuleTrace
from app.schemas.changeset import ChangeItem
class DocRulesChangesetGenerator:
def __init__(
self,
llm: AgentLlmService,
selector: DocRulesSelector | None = None,
prompt_builder: DocChangePromptBuilder | None = None,
) -> None:
self._llm = llm
self._selector = selector or DocRulesSelector()
self._prompt_builder = prompt_builder or DocChangePromptBuilder()
def generate(
self,
*,
bundle: DocRulesBundle,
item: PlannedChange,
domain: str,
subdomain: str,
project_root: str,
trace: ModuleTrace | None = None,
) -> tuple[ChangeItem | None, str | None]:
selected = self._selector.select(bundle, item.doc_type)
if selected is None:
return None, f"Для doc_type '{item.doc_type}' не найдено полных doc_rules (artifact/template)."
prompt_input = self._prompt_builder.build(item=item, rules=selected, domain=domain, subdomain=subdomain)
if trace is not None:
trace.log(
"changeset_prompt_built",
{
"doc_type": item.doc_type,
"path": item.path,
"prompt_chars": len(prompt_input),
"rules_chars": len(selected.render_for_prompt()),
},
)
raw = self._llm.generate(
"v2_docs_update.build_doc_changeset",
prompt_input,
log_context="workflow.v2.docs_update.from_feature.changeset",
trace=trace,
)
payload = self._parse_json(raw)
if payload is None:
return None, f"LLM вернул невалидный JSON changeset для {item.path}."
payload["op"] = item.op
payload["path"] = item.path
payload["reason"] = str(payload.get("reason") or item.reason)[:500]
if item.op in {"update", "delete"}:
base_hash = self._resolve_base_hash(project_root, item.path)
if not base_hash:
return None, f"{item.op.upper()} пропущен: не удалось вычислить base_hash для {item.path}."
payload["base_hash"] = base_hash
if item.op == "delete":
payload.pop("proposed_content", None)
try:
return ChangeItem.model_validate(payload), None
except Exception as exc:
return None, f"Невалидный changeset item для {item.path}: {exc}"
def _parse_json(self, raw: str) -> dict[str, object] | None:
text = str(raw or "").strip()
if not text:
return None
try:
value = json.loads(text)
return value if isinstance(value, dict) else None
except json.JSONDecodeError:
return None
def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
root = Path(project_root or "").expanduser()
if not root.is_absolute():
return ""
target = root / rel_path
if not target.exists() or not target.is_file():
return ""
try:
content = target.read_text(encoding="utf-8")
except Exception:
return ""
return hashlib.sha256(content.encode("utf-8")).hexdigest()
@@ -0,0 +1,50 @@
from __future__ import annotations
from pathlib import Path
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
class DocRulesLoader:
def __init__(self, root: Path | None = None) -> None:
base = root or (Path(__file__).resolve().parents[3] / "doc_rules")
self._root = base
@property
def root(self) -> Path:
return self._root
def load(self) -> DocRulesBundle:
return DocRulesBundle(
documentation_rules=self._read_file(self._root / "documentation-rules.md"),
global_rules=self._read_folder(self._root / "global", suffix=".md"),
artifact_rules=self._read_folder(self._root / "artifact-types", suffix=".md"),
templates=self._read_templates(self._root / "templates"),
sections=self._read_folder(self._root / "sections", suffix=".md"),
)
def _read_templates(self, folder: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not folder.exists() or not folder.is_dir():
return data
for path in sorted(folder.glob("*.template.md")):
key = path.name[: -len(".template.md")]
data[key] = self._safe_read(path)
return data
def _read_folder(self, folder: Path, suffix: str) -> dict[str, str]:
data: dict[str, str] = {}
if not folder.exists() or not folder.is_dir():
return data
for path in sorted(folder.glob(f"*{suffix}")):
data[path.stem] = self._safe_read(path)
return data
def _read_file(self, path: Path) -> str:
return self._safe_read(path) if path.exists() and path.is_file() else ""
def _safe_read(self, path: Path) -> str:
try:
return path.read_text(encoding="utf-8")
except Exception:
return ""
@@ -0,0 +1,61 @@
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import field
@dataclass(slots=True)
class DocRulesBundle:
documentation_rules: str = ""
global_rules: dict[str, str] = field(default_factory=dict)
artifact_rules: dict[str, str] = field(default_factory=dict)
templates: dict[str, str] = field(default_factory=dict)
sections: dict[str, str] = field(default_factory=dict)
@property
def supported_doc_types(self) -> set[str]:
return set(self.artifact_rules.keys()) & set(self.templates.keys())
def has_doc_type(self, doc_type: str) -> bool:
return doc_type in self.supported_doc_types
def render_global(self) -> str:
parts: list[str] = []
if self.documentation_rules.strip():
parts.append("### documentation-rules.md")
parts.append(self.documentation_rules.strip())
for name in sorted(self.global_rules.keys()):
value = self.global_rules.get(name, "").strip()
if not value:
continue
parts.append(f"### global/{name}.md")
parts.append(value)
return "\n\n".join(parts).strip()
@dataclass(slots=True)
class SelectedDocRules:
doc_type: str
global_text: str
artifact_text: str
template_text: str
section_texts: dict[str, str] = field(default_factory=dict)
def render_for_prompt(self) -> str:
parts: list[str] = []
if self.global_text.strip():
parts.append("## Global rules")
parts.append(self.global_text.strip())
if self.artifact_text.strip():
parts.append(f"## Artifact rules ({self.doc_type})")
parts.append(self.artifact_text.strip())
if self.template_text.strip():
parts.append(f"## Template ({self.doc_type})")
parts.append(self.template_text.strip())
for name in sorted(self.section_texts.keys()):
text = self.section_texts.get(name, "").strip()
if not text:
continue
parts.append(f"## Section rule: {name}")
parts.append(text)
return "\n\n".join(parts).strip()

Some files were not shown because too many files have changed in this diff Show More