diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..af98d06
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+ "files.exclude": {
+ "**/__pycache__": true
+ }
+}
diff --git a/_process.zip b/_process.zip
index c954819..ce74e52 100644
Binary files a/_process.zip and b/_process.zip differ
diff --git a/src/app/core/agent/processes/__init__.py b/src/app/core/agent/processes/__init__.py
index 1583712..510c092 100644
--- a/src/app/core/agent/processes/__init__.py
+++ b/src/app/core/agent/processes/__init__.py
@@ -1,6 +1,6 @@
from app.core.agent.processes.base import AgentProcess, ProcessResult
from app.core.agent.processes.v1.process import V1Process
-from app.core.agent.processes.v2.process import V2Process
+from app.core.agent.processes.v2.v2_process import V2Process
__all__ = [
"AgentProcess",
diff --git a/src/app/core/agent/processes/base.py b/src/app/core/agent/processes/base.py
index 6a49017..9b67fb8 100644
--- a/src/app/core/agent/processes/base.py
+++ b/src/app/core/agent/processes/base.py
@@ -2,8 +2,11 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
+from dataclasses import field
from typing import TYPE_CHECKING
+from app.schemas.changeset import ChangeItem
+
if TYPE_CHECKING:
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
@@ -11,6 +14,8 @@ if TYPE_CHECKING:
@dataclass(slots=True)
class ProcessResult:
answer: str = ""
+ changeset: list[ChangeItem] = field(default_factory=list)
+ apply_changeset: bool = False
class AgentProcess(ABC):
diff --git a/src/app/core/agent/processes/v2/__init__.py b/src/app/core/agent/processes/v2/__init__.py
index 3c34915..b2633c3 100644
--- a/src/app/core/agent/processes/v2/__init__.py
+++ b/src/app/core/agent/processes/v2/__init__.py
@@ -1,4 +1,11 @@
-from app.core.agent.processes.v2.process import V2Process
from app.core.agent.processes.v2.intent_router.router import V2IntentRouter
__all__ = ["V2IntentRouter", "V2Process"]
+
+
+def __getattr__(name: str):
+ if name == "V2Process":
+ from app.core.agent.processes.v2.v2_process import V2Process
+
+ return V2Process
+ raise AttributeError(name)
diff --git a/src/app/core/agent/processes/v2/doc_rules.zip b/src/app/core/agent/processes/v2/doc_rules.zip
new file mode 100644
index 0000000..0b5cb5b
Binary files /dev/null and b/src/app/core/agent/processes/v2/doc_rules.zip differ
diff --git a/src/app/core/agent/processes/v2/doc_rules/README.md b/src/app/core/agent/processes/v2/doc_rules/README.md
new file mode 100644
index 0000000..7d5472f
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/README.md
@@ -0,0 +1,53 @@
+# Documentation Rules Index
+
+Этот каталог содержит локализованную проекцию правил построения документации проекта.
+Источником истины для структуры и качества документов являются process-документы:
+- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/01. Process.md`
+- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/04. Analitycs artefacts.md`
+
+Файлы ниже не должны противоречить этим документам, а лишь конкретизируют их для `test_echo_app`.
+
+## Порядок использования
+
+1. Сначала прочитать `global/documentation-system.md`.
+2. Затем прочитать `global/frontmatter.md` и `global/linking.md`.
+3. Затем выбрать правило из `artifact-types/` по `doc_type`.
+4. Затем использовать шаблон из `templates/`.
+5. Для уточнения отдельных частей документа использовать правила из `sections/`.
+
+## Структура каталога
+
+- `global/` — общие правила системы документации.
+- `artifact-types/` — правила по типам артефактов.
+- `sections/` — правила для отдельных секций документов.
+- `templates/` — шаблоны документов.
+
+## Содержимое
+
+### Global
+- `global/documentation-system.md`
+- `global/frontmatter.md`
+- `global/writing-style.md`
+- `global/linking.md`
+- `global/naming.md`
+
+### Artifact types
+- `artifact-types/api_method.md`
+- `artifact-types/logic_block.md`
+- `artifact-types/architecture_overview.md`
+- `artifact-types/domain_entity.md`
+- `artifact-types/ui_page.md`
+- `artifact-types/integration_doc.md`
+
+### Sections
+- `sections/summary.md`
+- `sections/details.md`
+- `sections/api-scenario.md`
+- `sections/api-contract.md`
+- `sections/requirements-format.md`
+
+### Templates
+- `templates/api_method.template.md`
+- `templates/logic_block.template.md`
+- `templates/architecture_overview.template.md`
+- `templates/domain_entity.template.md`
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md
new file mode 100644
index 0000000..bfc6428
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md
@@ -0,0 +1,39 @@
+# API Method Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `api_method`.
+
+## Когда использовать
+
+Использовать для описания одного HTTP endpoint или одного отдельного API метода.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `#
`
+- `## Summary`
+- `## Details`
+
+Внутри `## Details` обязательны:
+- `### Описание`
+- `### Сценарий`
+- `### Функциональные требования`
+- `### Нефункциональные требования`
+- `### Контракт`
+
+## Особые правила
+
+- Сценарий оформляется как технический use case.
+- Функциональные требования маркируются `FR-*`.
+- Нефункциональные требования маркируются `NFR-*`.
+- Контракт должен быть пригоден для последующей сборки OpenAPI.
+- Если у метода есть интеграции, они выносятся в `### Интеграции`.
+- Ошибки и HTTP-коды либо описываются в `### Ошибки`, либо ссылаются на централизованный каталог ошибок.
+
+## Ошибки оформления
+
+- Нельзя заменять контракт общим текстовым описанием.
+- Нельзя смешивать несколько endpoint в одном документе.
+- Нельзя хранить связи и навигацию вне frontmatter.
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md
new file mode 100644
index 0000000..e4b146e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md
@@ -0,0 +1,31 @@
+# Architecture Overview Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `architecture_overview`.
+
+## Когда использовать
+
+Использовать как входной документ для понимания системы, модуля или сервиса.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `# `
+- `## Summary`
+- `## Details`
+
+## Что описывать в Details
+
+- границы системы
+- основные компоненты
+- ключевые взаимодействия
+- интеграционные сценарии
+- главные ограничения
+- ссылки на дочерние документы по API, logic, domain и другим артефактам
+
+## Ошибки оформления
+
+- Нельзя дублировать в архитектурном обзоре полные API-контракты.
+- Нельзя делать архитектурный обзор единственным документом на всю систему без декомпозиции.
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md
new file mode 100644
index 0000000..c533266
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md
@@ -0,0 +1,30 @@
+# Domain Entity Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `domain_entity`.
+
+## Когда использовать
+
+Использовать для описания одной доменной сущности, ее смысла, состояния и роли в системе.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `# `
+- `## Summary`
+- `## Details`
+
+## Что описывать в Details
+
+- смысл сущности
+- ключевые атрибуты
+- состояния или инварианты
+- использование сущности в системе
+- интеграции с API, workflow или внешними потребителями, если они важны для понимания модели
+
+## Ошибки оформления
+
+- Нельзя смешивать несколько независимых сущностей в одном документе.
+- Нельзя подменять доменную сущность описанием endpoint или workflow.
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md
new file mode 100644
index 0000000..7f1fe35
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md
@@ -0,0 +1,25 @@
+# Integration Doc Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `integration_doc`.
+
+## Когда использовать
+
+Использовать для описания интеграции между системами, сервисами или внешними провайдерами.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `# `
+- `## Summary`
+- `## Details`
+
+## Что описывать в Details
+
+- цель интеграции
+- участвующие стороны
+- направление обмена
+- ключевой сценарий взаимодействия
+- ограничения и риски
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md
new file mode 100644
index 0000000..788e4f7
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md
@@ -0,0 +1,31 @@
+# Logic Block Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `logic_block`.
+
+## Когда использовать
+
+Использовать для описания одного законченного блока логики, workflow или процесса.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `# `
+- `## Summary`
+- `## Details`
+
+## Что описывать в Details
+
+- назначение логического блока
+- входы и выходы
+- последовательность выполнения
+- интеграции
+- ключевые ограничения
+- состояние и ошибки, если они важны для понимания блока
+
+## Ошибки оформления
+
+- Нельзя описывать весь модуль целиком, если логика распадается на несколько независимых блоков.
+- Нельзя превращать документ в пересказ исходного кода построчно.
diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md
new file mode 100644
index 0000000..9640671
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md
@@ -0,0 +1,24 @@
+# UI Page Rules
+
+## Назначение
+
+Этот файл задает правила для документов типа `ui_page`.
+
+## Когда использовать
+
+Использовать для описания одной пользовательской страницы, экрана или отдельного UI-сценария.
+
+## Обязательная структура
+
+Документ должен содержать:
+- YAML frontmatter
+- `# `
+- `## Summary`
+- `## Details`
+
+## Что описывать в Details
+
+- назначение страницы
+- пользовательский сценарий
+- основные блоки интерфейса
+- связанные API и сущности
diff --git a/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md b/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md
new file mode 100644
index 0000000..1be0884
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md
@@ -0,0 +1,71 @@
+# Documentation Rules
+
+Этот каталог оформляет MVP документации проекта в атомарном формате.
+
+## Базовая структура
+
+- Каждый документ содержит YAML frontmatter.
+- В документе должен быть один `H1`, совпадающий с `title`.
+- Основные разделы оформляются как `## Summary` и `## Details`.
+- Внутри `Details` используются заголовки уровня `###` и ниже.
+- Связи, сущности и навигация описываются во frontmatter через `related_docs`, `links`, `entities`, `parent`, `children`.
+
+## Summary
+
+- Краткий explain-слой быстрого контекста.
+- Должен позволять быстро понять назначение документа без чтения `Details`.
+- Предпочтительный формат: компактный список ключевых фактов без длинных абзацев.
+
+## Details
+
+- Раскрывает полное описание объекта.
+- Структура `Details` зависит от типа документа.
+- Сценарии, ограничения, интеграции, ошибки и кодовые привязки должны быть разнесены по отдельным подразделам.
+
+## API documents
+
+Для `api_method` внутри `## Details` обязательны разделы:
+- `### Описание`
+- `### Сценарий`
+- `### Функциональные требования`
+- `### Нефункциональные требования`
+- `### Контракт`
+
+Если у метода есть интеграции и ошибки, также обязательны:
+- `### Интеграции`
+- `### Ошибки`
+- `### Связанный код`
+- `### История изменений`
+
+### Сценарий
+
+Сценарий оформляется как технический use case и содержит:
+- название
+- предусловия
+- триггер
+- основной сценарий
+- альтернативный сценарий
+- обработку ошибок
+- постусловие
+
+### Требования
+
+- Функциональные требования маркируются как `FR-1`, `FR-2`, ...
+- Нефункциональные требования маркируются как `NFR-1`, `NFR-2`, ...
+- Идентификаторы требований локальны в рамках одного документа.
+
+### Контракт
+
+Контракт должен быть пригоден для последующей сборки OpenAPI-спецификации и включать:
+- входные параметры
+- выходные параметры
+- структуру JSON-сообщений
+- обязательность полей
+- типы и ограничения
+- описание полей
+- правила заполнения
+- примеры данных
+- auth
+- idempotency
+- timeout
+- ошибки и их HTTP-коды
diff --git a/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md b/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md
new file mode 100644
index 0000000..656b569
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md
@@ -0,0 +1,38 @@
+# Documentation System
+
+## Назначение
+
+Этот файл задает общую модель документации проекта.
+
+## Базовая модель
+
+Каждый документ должен состоять из двух слоев:
+- YAML frontmatter
+- контент
+
+Контент всегда состоит из двух обязательных разделов:
+- `## Summary`
+- `## Details`
+
+Над ними должен быть один заголовок `# `, совпадающий со значением `title` во frontmatter.
+
+## Принципы
+
+- Документы должны быть атомарными.
+- Один документ описывает одну тему.
+- Вместо дублирования между документами используются явные ссылки.
+- Связи и навигация должны быть формализованы.
+- Документы должны быть пригодны для чтения человеком и для RAG.
+- Документы должны быть пригодны для частичного обновления без деградации структуры.
+
+## Типы документов
+
+На уровне проекта поддерживаются типы:
+- `api_method`
+- `logic_block`
+- `architecture_overview`
+- `domain_entity`
+- `ui_page`
+- `integration_doc`
+- `index_page`
+- `glossary_item`
diff --git a/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md b/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md
new file mode 100644
index 0000000..2b4dee5
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md
@@ -0,0 +1,67 @@
+# Frontmatter Rules
+
+## Назначение
+
+Этот файл описывает единый контракт YAML frontmatter для всех документов.
+
+## Обязательные поля
+
+```yaml
+id: string
+title: string
+doc_type: string
+domain: string
+sub_domain: string
+related_docs: []
+status: string
+```
+
+## Поля совместимости и рекомендуемые поля
+
+```yaml
+type: string
+name: string
+module: string
+layer: string
+updated_at: YYYY-MM-DD
+tags: []
+entities: []
+parent: string | null
+children: []
+links: {}
+source_of_truth: string
+related_code: []
+system_analytics_refs: []
+```
+
+## Правила
+
+- `id` должен быть стабильным и уникальным в пределах документации проекта.
+- `title` — человекочитаемый заголовок.
+- `doc_type` — канонический тип документа.
+- `domain` и `sub_domain` определяют бизнес-контекст документа.
+- `related_docs` хранит явные связи с другими markdown-документами.
+- `status` хранит жизненный цикл документа: например `draft`, `approved`, `active`.
+- `type` допустимо дублировать как alias для tooling-совместимости с индексаторами.
+- `name` — короткое системное имя документа.
+- `module` — модуль или подсистема.
+- `layer` — слой системы.
+- `updated_at` хранится в формате `YYYY-MM-DD`.
+
+## Связи и навигация
+
+- `entities` описывает сущности, связанные с документом.
+- `parent` и `children` описывают иерархию.
+- `links` описывает typed graph связей между документами, кодом и интеграциями.
+
+## Формат links
+
+```yaml
+links:
+ called_by:
+ - ext.health_probe
+ uses_logic:
+ - logic.some_flow
+ integrates_with:
+ - ext.some_system
+```
diff --git a/src/app/core/agent/processes/v2/doc_rules/global/linking.md b/src/app/core/agent/processes/v2/doc_rules/global/linking.md
new file mode 100644
index 0000000..6e49171
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/global/linking.md
@@ -0,0 +1,33 @@
+# Linking Rules
+
+## Назначение
+
+Этот файл описывает, как связывать документы между собой.
+
+## Иерархия
+
+- `parent` используется для родительского документа.
+- `children` используется для прямых дочерних документов.
+- Иерархия должна быть осмысленной и стабильной.
+- Для общей точки входа допустим `index_page`.
+
+## Графовые связи
+
+Для `related_docs` используются ссылки на соседние документы.
+
+Для `links` рекомендуется использовать typed-ключи:
+- `called_by`
+- `uses_logic`
+- `reads_db`
+- `writes_db`
+- `integrates_with`
+- `used_by`
+- `exposes_api`
+- `uses_entities`
+
+## Правила использования
+
+- Если документ логически входит в другой, использовать `parent`/`children`.
+- Если связь нужна для навигации между равноправными документами, дублировать ее в `related_docs`.
+- Если связь отражает поведение, интеграции или переиспользование, фиксировать ее в `links`.
+- Детальное описание интеграций хранить в body документа, а не только во frontmatter.
diff --git a/src/app/core/agent/processes/v2/doc_rules/global/naming.md b/src/app/core/agent/processes/v2/doc_rules/global/naming.md
new file mode 100644
index 0000000..c722416
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/global/naming.md
@@ -0,0 +1,24 @@
+# Naming Rules
+
+## Назначение
+
+Этот файл описывает правила именования документов, файлов и идентификаторов.
+
+## Правила для файлов
+
+- Имена файлов должны быть в kebab-case.
+- Имя файла должно отражать одну тему.
+- Для шаблонов использовать суффикс `.template.md`.
+
+## Правила для id
+
+- `id` строится в формате `.`.
+- Примеры:
+ - `api.send_message_endpoint`
+ - `logic.telegram_notification_loop`
+ - `architecture.telegram_notify_app`
+
+## Правила для title
+
+- `title` должен быть кратким и человекочитаемым.
+- В `title` допускаются пробелы и естественный язык.
diff --git a/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md b/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md
new file mode 100644
index 0000000..6c1caec
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md
@@ -0,0 +1,19 @@
+# Writing Style
+
+## Назначение
+
+Этот файл задает правила стиля для текстового наполнения документации.
+
+## Правила стиля
+
+- Текст должен быть лаконичным.
+- Формулировки должны быть точными и техническими.
+- Summary должен быть кратким explain-слоем.
+- Details должен раскрывать суть без лишней воды.
+- Нежелательно смешивать несколько тем в одном документе.
+- Если детали относятся к другому артефакту, их нужно выносить в отдельный документ.
+
+## Язык
+
+- Основной язык документации — русский.
+- Технические термины, названия классов, API, RAG, OpenAPI, runtime и другие устоявшиеся identifiers можно оставлять на английском.
diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md b/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md
new file mode 100644
index 0000000..fc313ea
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md
@@ -0,0 +1,24 @@
+# API Contract Rules
+
+## Назначение
+
+Этот файл описывает, как оформлять подраздел `## Контракт` в API-документах.
+
+## Что должно быть описано
+
+- входные параметры
+- выходные параметры
+- JSON-структуры запросов и ответов
+- обязательность полей
+- типы полей
+- ограничения
+- описание назначения полей
+- примеры данных
+- auth
+- idempotency
+- timeout
+- ошибки и их HTTP-коды
+
+## Правило качества
+
+Контракт должен быть достаточно формальным, чтобы по нему можно было собрать OpenAPI-спецификацию.
diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md b/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md
new file mode 100644
index 0000000..c9066af
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md
@@ -0,0 +1,21 @@
+# API Scenario Rules
+
+## Назначение
+
+Этот файл описывает, как оформлять подраздел `### Сценарий` в API-документах.
+
+## Обязательные части
+
+- название
+- предусловия
+- триггер
+- основной сценарий
+- альтернативный сценарий
+- обработка ошибок
+- постусловие
+
+## Правила
+
+- Сценарий должен быть лаконичным.
+- Сценарий должен отражать суть шага.
+- Сложные технические детали надо выносить в `FR-*`.
diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/details.md b/src/app/core/agent/processes/v2/doc_rules/sections/details.md
new file mode 100644
index 0000000..33ed824
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/sections/details.md
@@ -0,0 +1,13 @@
+# Details Section Rules
+
+## Назначение
+
+Этот файл задает общие правила для секции `## Details`.
+
+## Правила
+
+- `Details` оформляется как `## Details`.
+- Внутри `Details` используются заголовки уровня `###` и ниже.
+- Структура Details зависит от типа документа.
+- В Details не нужно повторно дублировать навигацию и связи, если они уже есть во frontmatter.
+- Интеграции, ошибки и кодовые привязки должны быть выделены в отдельные подразделы, если они существенны для понимания документа.
diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md b/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md
new file mode 100644
index 0000000..14eb0c3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md
@@ -0,0 +1,16 @@
+# Requirements Format Rules
+
+## Назначение
+
+Этот файл задает формат для функциональных и нефункциональных требований.
+
+## Функциональные требования
+
+- Использовать коды `FR-1`, `FR-2`, `FR-3` и так далее.
+- Каждое требование должно описывать отдельный обязательный аспект поведения.
+- Идентификаторы локальны в пределах одного документа.
+
+## Нефункциональные требования
+
+- Использовать коды `NFR-1`, `NFR-2`, `NFR-3` и так далее.
+- Требования должны описывать характеристики качества, ограничения и эксплуатационные свойства.
diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/summary.md b/src/app/core/agent/processes/v2/doc_rules/sections/summary.md
new file mode 100644
index 0000000..7f7fa7b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/sections/summary.md
@@ -0,0 +1,13 @@
+# Summary Section Rules
+
+## Назначение
+
+Этот файл задает правила для секции `## Summary`.
+
+## Правила
+
+- Summary должен быть коротким explain-слоем быстрого контекста.
+- Summary должен объяснять суть документа без лишних деталей.
+- Summary должен быть пригоден для explain и быстрого чтения.
+- Предпочтительный формат: список ключевых фактов `Purpose`, `Actor`, `Trigger`, `Errors`, `Related ...` и т.д.
+- Для крупных документов допустим более длинный summary, если он остается структурированным.
diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md
new file mode 100644
index 0000000..ba2ac07
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md
@@ -0,0 +1,84 @@
+---
+id: api.example_method
+type: api_method
+doc_type: api_method
+name: example_method
+title: HTTP API /example
+module: example_module
+layer: application
+domain: example_domain
+sub_domain: example_subdomain
+related_docs: []
+status: draft
+updated_at: 2026-03-20
+source_of_truth: code
+parent: null
+children: []
+tags: []
+entities: []
+links: {}
+---
+
+# HTTP API /example
+
+## Summary
+
+Краткое описание метода.
+
+## Details
+
+## Описание
+
+Короткое описание сути метода.
+
+## Сценарий
+
+**Название:**
+
+**Предусловия:**
+-
+
+**Триггер:**
+-
+
+**Основной сценарий:**
+1.
+
+**Альтернативный сценарий:**
+1.
+
+**Обработка ошибок:**
+1.
+
+**Постусловие:**
+-
+
+## Функциональные требования
+
+**FR-1.**
+
+## Нефункциональные требования
+
+**NFR-1.**
+
+## Контракт
+
+### Входные параметры
+
+| Параметр | Где передается | Тип | Обязательность | Ограничения | Описание | Пример |
+|---|---|---|---|---|---|---|
+| | | | | | | |
+
+### Выходные параметры
+
+| Поле | Тип | Обязательность | Ограничения | Описание | Заполнение | Пример |
+|---|---|---|---|---|---|---|
+| | | | | | | |
+
+### Интеграции
+
+### Ошибки
+
+### Связанный код
+
+### История изменений
diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md
new file mode 100644
index 0000000..21c7319
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md
@@ -0,0 +1,48 @@
+---
+id: architecture.example_system
+type: architecture_overview
+doc_type: architecture_overview
+name: example_system
+title: Обзор архитектуры Example System
+module: example_module
+layer: system
+domain: example_domain
+sub_domain: example_subdomain
+related_docs: []
+status: draft
+updated_at: 2026-03-20
+source_of_truth: mixed
+parent: null
+children: []
+tags: []
+entities: []
+links: {}
+---
+
+# Обзор архитектуры Example System
+
+## Summary
+
+Краткое описание архитектуры.
+
+## Details
+
+### Описание
+
+### Контекст
+
+### Границы системы
+
+### Компоненты
+
+### Интеграционные сценарии
+
+### Интеграции
+
+### Ограничения
+
+### Связанный код
+
+### Связанные документы
+
+### История изменений
diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md
new file mode 100644
index 0000000..f8fd65e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md
@@ -0,0 +1,48 @@
+---
+id: domain.example_entity
+type: domain_entity
+doc_type: domain_entity
+name: example_entity
+title: Пример доменной сущности
+module: example_module
+layer: domain
+domain: example_domain
+sub_domain: example_subdomain
+related_docs: []
+status: draft
+updated_at: 2026-03-20
+source_of_truth: code
+parent: null
+children: []
+tags: []
+entities: []
+links: {}
+---
+
+# Пример доменной сущности
+
+## Summary
+
+Краткое описание сущности.
+
+## Details
+
+### Описание
+
+### Модель данных
+
+### Состояния и инварианты
+
+### Технический use case
+
+### Функциональные требования
+
+### Нефункциональные требования
+
+### Интеграции
+
+### Связанный код
+
+### Связанные документы
+
+### История изменений
diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md
new file mode 100644
index 0000000..36e1d5d
--- /dev/null
+++ b/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md
@@ -0,0 +1,50 @@
+---
+id: logic.example_block
+type: logic_block
+doc_type: logic_block
+name: example_block
+title: Пример блока логики
+module: example_module
+layer: application
+domain: example_domain
+sub_domain: example_subdomain
+related_docs: []
+status: draft
+updated_at: 2026-03-20
+source_of_truth: code
+parent: null
+children: []
+tags: []
+entities: []
+links: {}
+---
+
+# Пример блока логики
+
+## Summary
+
+Краткое описание блока логики.
+
+## Details
+
+### Описание
+
+### Контекст
+
+### Технический use case
+
+### Функциональные требования
+
+### Нефункциональные требования
+
+### Интеграции
+
+### Ограничения и условия вызова
+
+### Ошибки и деградации
+
+### Связанные API
+
+### Связанный код
+
+### История изменений
diff --git a/src/app/core/agent/processes/v2/intent_router/models.py b/src/app/core/agent/processes/v2/intent_router/models.py
index a054940..91c04d3 100644
--- a/src/app/core/agent/processes/v2/intent_router/models.py
+++ b/src/app/core/agent/processes/v2/intent_router/models.py
@@ -16,3 +16,4 @@ class QueryFeatures:
logic_markers: list[str]
domain_markers: list[str]
endpoint_markers: list[str]
+ scope_type: str = "unknown"
diff --git a/src/app/core/agent/processes/v2/intent_router/modules/anchors.py b/src/app/core/agent/processes/v2/intent_router/modules/anchors.py
index 61d83d5..b9ca68c 100644
--- a/src/app/core/agent/processes/v2/intent_router/modules/anchors.py
+++ b/src/app/core/agent/processes/v2/intent_router/modules/anchors.py
@@ -4,7 +4,7 @@ import re
from dataclasses import dataclass
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
-from app.core.agent.processes.v2.models import V2RouteAnchors
+from app.core.agent.utils.process_v2.models import V2RouteAnchors
@dataclass(slots=True)
diff --git a/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py b/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py
new file mode 100644
index 0000000..15e9eb1
--- /dev/null
+++ b/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py
@@ -0,0 +1,176 @@
+"""Build an in-memory DOCS scope index from D1/D3 catalog rows (no chunk retrieval).
+
+Parses metadata from ``D1_DOCUMENT_CATALOG`` and ``D3_ENTITY_CATALOG`` rows produced by the
+existing RAG indexer—no additional layers or chunk scans.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+
+
+def _norm_text(value: object) -> str:
+ return re.sub(r"\s+", " ", str(value or "").strip().lower())
+
+
+def _split_multi(value: object) -> list[str]:
+ if value is None:
+ return []
+ if isinstance(value, list):
+ raw = value
+ else:
+ raw = re.split(r"[;,|]", str(value))
+ out: list[str] = []
+ for item in raw:
+ s = str(item).strip()
+ if s:
+ out.append(s)
+ return out
+
+
+@dataclass(slots=True)
+class DocsScopeCatalog:
+ """Flattened terms from D1_DOCUMENT_CATALOG and D3_ENTITY_CATALOG for lexical grounding."""
+
+ domain_values: set[str] = field(default_factory=set)
+ subdomain_pairs: list[tuple[str, str]] = field(default_factory=list) # (domain, subdomain)
+ entity_records: list[dict[str, object]] = field(default_factory=list)
+ api_records: list[dict[str, object]] = field(default_factory=list)
+
+
+def build_docs_scope_catalog(rows: list[dict]) -> DocsScopeCatalog:
+ """Derive searchable terms from catalog layers only (existing RAG index rows)."""
+ catalog = DocsScopeCatalog()
+ for row in rows:
+ layer = str(row.get("layer") or "")
+ meta = row.get("metadata")
+ if not isinstance(meta, dict):
+ meta = {}
+ path = str(row.get("path") or "")
+ title = str(row.get("title") or "")
+ content = str(row.get("content") or "")
+
+ if layer == "D1_DOCUMENT_CATALOG":
+ _ingest_d1_row(catalog, path=path, title=title, content=content, metadata=meta)
+ elif layer == "D3_ENTITY_CATALOG":
+ _ingest_d3_row(catalog, path=path, title=title, metadata=meta)
+
+ return catalog
+
+
+def _ingest_d1_row(
+ catalog: DocsScopeCatalog,
+ *,
+ path: str,
+ title: str,
+ content: str,
+ metadata: dict,
+) -> None:
+ doc_type = _norm_text(metadata.get("type") or metadata.get("doc_type"))
+ domain = _norm_text(metadata.get("domain"))
+ subdomain = _norm_text(metadata.get("subdomain"))
+ name = _norm_text(metadata.get("name"))
+ summary = _norm_text(metadata.get("summary_text"))
+ endpoint = _norm_text(metadata.get("endpoint"))
+
+ entities = [_norm_text(e) for e in _split_multi(metadata.get("entities"))]
+ tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
+
+ if domain:
+ catalog.domain_values.add(domain)
+ if domain and subdomain:
+ catalog.subdomain_pairs.append((domain, subdomain))
+
+ blob = " ".join(x for x in (name, title, summary, content) if x)
+ for ent in entities:
+ if ent:
+ catalog.entity_records.append(
+ {
+ "name": ent,
+ "domain": domain or None,
+ "subdomain": subdomain or None,
+ "source_layer": "D1_DOCUMENT_CATALOG",
+ "path": path,
+ "blob": blob,
+ }
+ )
+ for tag in tags:
+ if tag and len(tag) >= 3:
+ catalog.entity_records.append(
+ {
+ "name": tag,
+ "domain": domain or None,
+ "subdomain": subdomain or None,
+ "source_layer": "D1_DOCUMENT_CATALOG",
+ "path": path,
+ "blob": blob,
+ }
+ )
+
+ is_api_method = doc_type == "api_method" or "api_method" in path.lower()
+ if is_api_method or endpoint:
+ ep = endpoint or _endpoint_from_title(title)
+ if ep:
+ catalog.api_records.append(
+ {
+ "endpoint": ep,
+ "domain": domain or None,
+ "source_layer": "D1_DOCUMENT_CATALOG",
+ "path": path,
+ "title": title,
+ }
+ )
+
+
+def _ingest_d3_row(
+ catalog: DocsScopeCatalog,
+ *,
+ path: str,
+ title: str,
+ metadata: dict,
+) -> None:
+ entity_name = str(metadata.get("entity_name") or "").strip()
+ domain = _norm_text(metadata.get("domain"))
+ subdomain = _norm_text(metadata.get("subdomain"))
+ module = _norm_text(metadata.get("module"))
+ source_path = str(metadata.get("source_path") or "").strip()
+ tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
+
+ if domain:
+ catalog.domain_values.add(domain)
+ if domain and subdomain:
+ catalog.subdomain_pairs.append((domain, subdomain))
+
+ blob = " ".join(
+ _norm_text(x)
+ for x in (entity_name, title, module, source_path, " ".join(tags))
+ if x
+ )
+ if entity_name:
+ catalog.entity_records.append(
+ {
+ "name": _norm_text(entity_name),
+ "domain": domain or None,
+ "subdomain": subdomain or None,
+ "module": module or None,
+ "source_layer": "D3_ENTITY_CATALOG",
+ "path": path or source_path,
+ "blob": blob,
+ }
+ )
+
+
+def _endpoint_from_title(title: str) -> str:
+ t = str(title or "").strip()
+ if not t:
+ return ""
+ upper = t.upper()
+ for method in ("GET ", "POST ", "PUT ", "PATCH ", "DELETE "):
+ if method in upper:
+ idx = upper.index(method)
+ rest = t[idx:].split()
+ if len(rest) >= 2 and rest[1].startswith("/"):
+ return _norm_text(rest[1])
+ m = re.search(r"(\/[a-z0-9_./{}-]+)", t, re.IGNORECASE)
+ return _norm_text(m.group(1)) if m else ""
diff --git a/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py b/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py
new file mode 100644
index 0000000..b61f967
--- /dev/null
+++ b/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py
@@ -0,0 +1,443 @@
+"""Deterministic scope resolution from query + derived DOCS catalog (pre-LLM).
+
+Matches the user query against catalog terms (exact / normalized). Optional embedding-based
+retrieval can extend candidates later; final ``scope_type`` never relies on embeddings alone.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+
+from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog
+from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
+from app.core.agent.utils.process_v2.models import ScopeCandidate, V2ScopeType
+
+
+_SCORE_EXACT = 1.0
+_SCORE_NORMALIZED = 0.88
+_SCORE_SOFT = 0.72
+_STRONG_THRESHOLD = 0.85
+
+_ENUM_MARKERS_RU = (
+ "какие ",
+ "какие\n",
+ "какой ",
+ "какого ",
+ "список",
+ "перечисли",
+ "перечислить",
+ "все api",
+ "все методы",
+ "какие api",
+ "какие методы",
+ "каких ",
+)
+_SINGLE_SEGMENT_ENDPOINT_ALLOWLIST = frozenset(
+ {
+ "/health",
+ "/send",
+ "/healthz",
+ "/ready",
+ "/live",
+ "/metrics",
+ }
+)
+
+_PROJECT_WIDE_MARKERS = (
+ "в проекте",
+ "в системе",
+ "в приложении",
+ "по проекту",
+ "во всем проекте",
+ "overall",
+ "in the project",
+)
+
+
+@dataclass(slots=True)
+class ScopeResolution:
+ scope_type: str = V2ScopeType.UNKNOWN
+ candidate_domains: list[ScopeCandidate] = field(default_factory=list)
+ candidate_subdomains: list[ScopeCandidate] = field(default_factory=list)
+ candidate_entities: list[ScopeCandidate] = field(default_factory=list)
+ candidate_apis: list[ScopeCandidate] = field(default_factory=list)
+ strong_domain: str | None = None
+ strong_subdomain: str | None = None
+ strong_entity_names: list[str] = field(default_factory=list)
+ strong_endpoint_paths: list[str] = field(default_factory=list)
+ catalog_loaded: bool = False
+
+
+def _catalog_has_index_terms(catalog: DocsScopeCatalog) -> bool:
+ return bool(
+ catalog.domain_values
+ or catalog.subdomain_pairs
+ or catalog.entity_records
+ or catalog.api_records
+ )
+
+
+def plausible_doc_endpoint_paths(paths: list[str]) -> list[str]:
+ """Drop spurious ``/token`` paths from api-like heuristics (e.g. ``/billing`` after ``api``)."""
+ out: list[str] = []
+ for raw in paths:
+ p = str(raw or "").strip().lower()
+ if not p.startswith("/"):
+ continue
+ segments = [s for s in p.split("/") if s]
+ if len(segments) >= 2:
+ out.append(p)
+ continue
+ if len(segments) == 1 and p in _SINGLE_SEGMENT_ENDPOINT_ALLOWLIST:
+ out.append(p)
+ continue
+ return out
+
+
+def resolve_docs_scope(
+ normalized_query: str,
+ terms: TargetTermsAnalysis,
+ catalog: DocsScopeCatalog | None,
+) -> ScopeResolution:
+ """Lexical scope resolution; embeddings never set final scope alone (not used here)."""
+ resolution = ScopeResolution()
+ if catalog is None:
+ return resolution
+ if not _catalog_has_index_terms(catalog):
+ return resolution
+
+ resolution.catalog_loaded = True
+ query_l = _norm_query(normalized_query)
+ if not query_l:
+ resolution.scope_type = V2ScopeType.UNKNOWN
+ return resolution
+
+ _collect_domain_candidates(query_l, catalog, resolution)
+ _collect_subdomain_candidates(query_l, catalog, resolution)
+ _collect_entity_candidates(query_l, catalog, resolution)
+ _collect_api_candidates(query_l, catalog, resolution)
+
+ _dedupe_candidates(resolution)
+
+ endpoint_paths = plausible_doc_endpoint_paths(list(terms.endpoint_paths))
+ strong_api = _pick_strong(resolution.candidate_apis)
+ strong_entity = _pick_strong(resolution.candidate_entities)
+ strong_sub = _pick_strong(resolution.candidate_subdomains)
+ strong_dom = _pick_strong(resolution.candidate_domains)
+
+ resolution.strong_endpoint_paths = list(dict.fromkeys(endpoint_paths))
+
+ if endpoint_paths:
+ resolution.scope_type = V2ScopeType.ENTITY
+ resolution.strong_entity_names = _merge_unique(resolution.strong_entity_names, _entities_for_endpoints(endpoint_paths, catalog))
+ return resolution
+
+ if strong_api and strong_api.score >= _STRONG_THRESHOLD:
+ resolution.scope_type = V2ScopeType.ENTITY
+ resolution.strong_endpoint_paths = _merge_unique(resolution.strong_endpoint_paths, [strong_api.value])
+ return resolution
+
+ strong_sub_pre = _pick_strong(resolution.candidate_subdomains)
+ if (
+ strong_sub_pre
+ and strong_sub_pre.score >= _STRONG_THRESHOLD
+ and _subdomain_aligned_with_query(query_l, strong_sub_pre.value)
+ ):
+ resolution.scope_type = V2ScopeType.SUBDOMAIN
+ parts = _split_subdomain_value(strong_sub_pre.value)
+ if parts:
+ resolution.strong_domain = parts[0]
+ resolution.strong_subdomain = parts[1]
+ return resolution
+
+ if strong_entity and strong_entity.score >= _STRONG_THRESHOLD:
+ resolution.scope_type = V2ScopeType.ENTITY
+ resolution.strong_entity_names = _merge_unique(
+ resolution.strong_entity_names,
+ [strong_entity.value],
+ )
+ return resolution
+
+ if strong_sub and strong_sub.score >= _STRONG_THRESHOLD:
+ resolution.scope_type = V2ScopeType.SUBDOMAIN
+ parts = _split_subdomain_value(strong_sub.value)
+ if parts:
+ resolution.strong_domain = parts[0]
+ resolution.strong_subdomain = parts[1]
+ return resolution
+
+ if strong_dom and strong_dom.score >= _STRONG_THRESHOLD:
+ resolution.scope_type = V2ScopeType.DOMAIN
+ resolution.strong_domain = strong_dom.value
+ return resolution
+
+ if _is_global_enumeration(query_l, has_strong_any=bool(_any_strong(resolution))):
+ resolution.scope_type = V2ScopeType.GLOBAL
+ return resolution
+
+ resolution.scope_type = V2ScopeType.UNKNOWN
+ return resolution
+
+
+def promote_target_terms(
+ raw_terms: list[str],
+ terms: TargetTermsAnalysis,
+ resolution: ScopeResolution,
+) -> list[str]:
+ """Keep only high-confidence terms in ``target_terms``; weak matches stay in candidate_* only."""
+ if not resolution.catalog_loaded:
+ return list(raw_terms)
+ out: list[str] = []
+ strong_values = {c.value for c in _all_candidates(resolution) if c.score >= _STRONG_THRESHOLD}
+ strong_values |= {c.value for c in _all_candidates(resolution) if c.match_type == "exact"}
+ strong_entity = set(resolution.strong_entity_names)
+ endpoints = set(terms.endpoint_paths)
+ aliases = set(terms.matched_aliases)
+
+ for term in raw_terms:
+ t = str(term or "").strip()
+ if not t:
+ continue
+ tl = t.lower()
+ if t in endpoints or tl in {e.lower() for e in endpoints}:
+ _append_unique(out, tl if tl.startswith("/") else t)
+ continue
+ if t in aliases or tl in {a.lower() for a in aliases}:
+ _append_unique(out, tl)
+ continue
+ if tl in strong_values or t in strong_entity:
+ _append_unique(out, tl)
+ continue
+ if _is_explicit_identifier(t) and tl in strong_entity:
+ _append_unique(out, tl)
+ continue
+ # Drop weak/ungrounded terms (remain only in candidates on anchors)
+ return out
+
+
+def _all_candidates(resolution: ScopeResolution) -> list[ScopeCandidate]:
+ return [
+ *resolution.candidate_domains,
+ *resolution.candidate_subdomains,
+ *resolution.candidate_entities,
+ *resolution.candidate_apis,
+ ]
+
+
+def _any_strong(resolution: ScopeResolution) -> bool:
+ return any(c.score >= _STRONG_THRESHOLD for c in _all_candidates(resolution))
+
+
+def _pick_strong(candidates: list[ScopeCandidate]) -> ScopeCandidate | None:
+ if not candidates:
+ return None
+ return max(candidates, key=lambda c: (c.score, len(c.value)))
+
+
+def _norm_query(q: str) -> str:
+ return re.sub(r"\s+", " ", str(q or "").strip().lower())
+
+
+def _append_unique(items: list[str], value: str) -> None:
+ if value and value not in items:
+ items.append(value)
+
+
+def _merge_unique(a: list[str], b: list[str]) -> list[str]:
+ return list(dict.fromkeys([*a, *b]))
+
+
+def _is_explicit_identifier(token: str) -> bool:
+ return bool(re.fullmatch(r"[A-Za-z][A-Za-z0-9_]+", token))
+
+
+def _split_subdomain_value(value: str) -> tuple[str, str] | None:
+ parts = str(value or "").split("::", 1)
+ if len(parts) == 2 and parts[0] and parts[1]:
+ return parts[0].strip().lower(), parts[1].strip().lower()
+ return None
+
+
+def _subdomain_aligned_with_query(query_l: str, composite: str) -> bool:
+ """True when both domain and subdomain tokens match the query (substring / token match)."""
+ parts = str(composite or "").split("::", 1)
+ if len(parts) != 2:
+ return False
+ dom, sub = parts[0].strip().lower(), parts[1].strip().lower()
+ s_dom, _ = _match_score(query_l, dom)
+ s_sub, _ = _match_score(query_l, sub)
+ return s_dom > 0 and s_sub > 0
+
+
+def _entities_for_endpoints(endpoint_paths: list[str], catalog: DocsScopeCatalog) -> list[str]:
+ found: list[str] = []
+ eps = {e.lower() for e in endpoint_paths if e}
+ for rec in catalog.entity_records:
+ blob = str(rec.get("blob") or "").lower()
+ name = str(rec.get("name") or "").strip().lower()
+ if not name:
+ continue
+ if any(ep and ep in blob for ep in eps):
+ _append_unique(found, name)
+ return found
+
+
+def _collect_domain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
+ for dom in catalog.domain_values:
+ if not dom:
+ continue
+ score, mtype = _match_score(query_l, dom)
+ if score <= 0:
+ continue
+ resolution.candidate_domains.append(
+ ScopeCandidate(
+ value=dom,
+ score=score,
+ source_layer="D1_DOCUMENT_CATALOG",
+ match_type=mtype,
+ )
+ )
+
+
+def _collect_subdomain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
+ seen: set[str] = set()
+ for dom, sub in catalog.subdomain_pairs:
+ if not dom or not sub:
+ continue
+ composite = f"{dom}::{sub}"
+ if composite in seen:
+ continue
+ seen.add(composite)
+ score_dom, _ = _match_score(query_l, dom)
+ score_sub, mt_sub = _match_score(query_l, sub)
+ phrase = _phrase_score(query_l, dom, sub)
+ if phrase > 0:
+ score = phrase
+ mt = "normalized"
+ elif score_dom > 0 and score_sub > 0:
+ score = min(score_dom, score_sub)
+ mt = mt_sub
+ else:
+ # Avoid promoting a (domain, subdomain) pair when only the domain token matches.
+ score = 0.0
+ mt = mt_sub
+ if score <= 0:
+ continue
+ resolution.candidate_subdomains.append(
+ ScopeCandidate(
+ value=composite,
+ score=score,
+ source_layer="D1_DOCUMENT_CATALOG",
+ match_type=mt,
+ )
+ )
+
+
+def _collect_entity_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
+ for rec in catalog.entity_records:
+ name = str(rec.get("name") or "").strip().lower()
+ if not name or len(name) < 2:
+ continue
+ blob = str(rec.get("blob") or "").lower()
+ layer = str(rec.get("source_layer") or "")
+ score, mtype = _match_entity(query_l, name, blob)
+ if score <= 0:
+ continue
+ resolution.candidate_entities.append(
+ ScopeCandidate(value=name, score=score, source_layer=layer, match_type=mtype)
+ )
+
+
+def _collect_api_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
+ for rec in catalog.api_records:
+ ep = str(rec.get("endpoint") or "").strip().lower()
+ if not ep:
+ continue
+ layer = str(rec.get("source_layer") or "")
+ score, mtype = _match_score(query_l, ep.replace(" ", ""))
+ if score <= 0:
+ continue
+ resolution.candidate_apis.append(
+ ScopeCandidate(value=ep, score=score, source_layer=layer, match_type=mtype)
+ )
+
+
+def _phrase_score(query_l: str, dom: str, sub: str) -> float:
+ if _contains_token(query_l, dom) and _contains_token(query_l, sub):
+ return max(_SCORE_NORMALIZED, 0.9)
+ joined = re.sub(r"\s+", " ", f"{dom} {sub}".strip())
+ if joined in query_l or query_l in joined:
+ return _SCORE_NORMALIZED
+ return 0.0
+
+
+def _match_entity(query_l: str, name: str, blob: str) -> tuple[float, str]:
+ score, mt = _match_score(query_l, name)
+ if score > 0:
+ return score, mt
+ if name in blob and len(name) >= 4:
+ # cross-language hints: name appears in catalog blob; small boost if query token overlaps blob
+ q_tokens = set(query_l.split())
+ b_tokens = set(blob.split())
+ overlap = q_tokens & b_tokens
+ if overlap and (q_tokens & {name} or name[:4] in query_l):
+ return _SCORE_SOFT, "normalized"
+ return 0.0, "normalized"
+
+
+def _match_score(query_l: str, value: str) -> tuple[float, str]:
+ v = str(value or "").strip().lower()
+ if not v:
+ return 0.0, "normalized"
+ v_compact = v.replace(" ", "")
+ q_compact = query_l.replace(" ", "")
+ if v == query_l or v_compact == q_compact:
+ return _SCORE_EXACT, "exact"
+ if _contains_token(query_l, v) or _contains_token(query_l, v.replace("/", " ")):
+ return _SCORE_EXACT, "exact"
+ if v in q_compact or v_compact in q_compact:
+ return _SCORE_NORMALIZED, "normalized"
+ if v in query_l:
+ return _SCORE_NORMALIZED, "normalized"
+ # prefix / slug
+ for token in query_l.split():
+ if token.startswith(v[: min(4, len(v))]) and len(v) >= 4:
+ return _SCORE_SOFT, "normalized"
+ return 0.0, "normalized"
+
+
+def _contains_token(hay: str, needle: str) -> bool:
+ if not needle:
+ return False
+ return f" {needle} " in f" {hay} "
+
+
+def _dedupe_candidates(resolution: ScopeResolution) -> None:
+ resolution.candidate_domains = _dedupe_list(resolution.candidate_domains)
+ resolution.candidate_subdomains = _dedupe_list(resolution.candidate_subdomains)
+ resolution.candidate_entities = _dedupe_list(resolution.candidate_entities)
+ resolution.candidate_apis = _dedupe_list(resolution.candidate_apis)
+
+
+def _dedupe_list(items: list[ScopeCandidate]) -> list[ScopeCandidate]:
+ best: dict[str, ScopeCandidate] = {}
+ for c in items:
+ key = f"{c.value}|{c.source_layer}"
+ prev = best.get(key)
+ if prev is None or c.score > prev.score:
+ best[key] = c
+ return sorted(best.values(), key=lambda c: (-c.score, c.value))
+
+
+def _is_global_enumeration(query_l: str, *, has_strong_any: bool) -> bool:
+ if has_strong_any:
+ return False
+ if any(m in query_l for m in _PROJECT_WIDE_MARKERS) and any(
+ m in query_l for m in ("какие", "какой", "список", "перечисли", "метод", "api")
+ ):
+ return True
+ if any(query_l.strip().startswith(m.strip()) for m in _ENUM_MARKERS_RU if len(m.strip()) > 2):
+ if any(k in query_l for k in ("метод", "api", "ручк", "эндпоинт")):
+ return True
+ return False
diff --git a/src/app/core/agent/processes/v2/intent_router/router.py b/src/app/core/agent/processes/v2/intent_router/router.py
index c78596d..f987c99 100644
--- a/src/app/core/agent/processes/v2/intent_router/router.py
+++ b/src/app/core/agent/processes/v2/intent_router/router.py
@@ -2,8 +2,17 @@
from __future__ import annotations
+from collections.abc import Callable
+from dataclasses import replace
+
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer
+from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog, build_docs_scope_catalog
+from app.core.agent.processes.v2.intent_router.modules.scope_resolver import (
+ plausible_doc_endpoint_paths,
+ promote_target_terms,
+ resolve_docs_scope,
+)
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
from app.core.agent.processes.v2.intent_router.routers.confidence import V2ConfidenceAdjuster
@@ -11,8 +20,18 @@ from app.core.agent.processes.v2.intent_router.routers.fallback import V2Fallbac
from app.core.agent.processes.v2.intent_router.routers.llm import V2LlmRouter
from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2RouteCatalog
from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator
-from app.core.agent.processes.v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType
from app.core.agent.utils.llm import AgentLlmService
+from app.core.rag.persistence.query_repository import RagQueryRepository
+
+
+def _scope_candidate_dict(candidate) -> dict[str, object]:
+ return {
+ "value": candidate.value,
+ "score": candidate.score,
+ "source_layer": candidate.source_layer,
+ "match_type": candidate.match_type,
+ }
class V2IntentRouter:
@@ -25,6 +44,7 @@ class V2IntentRouter:
enable_llm_disambiguation: bool = True,
route_catalog: V2RouteCatalog | None = None,
confidence_adjuster: V2ConfidenceAdjuster | None = None,
+ scope_rows_provider: Callable[[str], list[dict]] | None = None,
) -> None:
self._normalizer = normalizer or V2QueryNormalizer()
self._target_terms_extractor = target_terms_extractor or V2TargetTermsExtractor()
@@ -35,23 +55,48 @@ class V2IntentRouter:
self._confidence_adjuster = confidence_adjuster or V2ConfidenceAdjuster()
self._enable_llm_disambiguation = enable_llm_disambiguation
self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None
+ self._scope_rows_provider = scope_rows_provider
- def route(self, user_query: str) -> V2RouteResult:
+ def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult:
normalized_query = self._normalizer.normalize(user_query)
target_terms_analysis = self._target_terms_extractor.extract(normalized_query)
- anchor_analysis = self._anchor_extractor.extract(normalized_query, target_terms_analysis)
+ sanitized_eps = plausible_doc_endpoint_paths(list(target_terms_analysis.endpoint_paths))
+ if sanitized_eps != list(target_terms_analysis.endpoint_paths):
+ target_terms_analysis = replace(target_terms_analysis, endpoint_paths=sanitized_eps)
+ allowed_paths = set(sanitized_eps)
+ target_terms_analysis = replace(
+ target_terms_analysis,
+ target_terms=[
+ t
+ for t in target_terms_analysis.target_terms
+ if not str(t).startswith("/") or str(t).lower() in allowed_paths
+ ],
+ )
+ raw_target_terms = list(target_terms_analysis.target_terms)
+ scope_rows = self._load_scope_rows(rag_session_id)
+ scope_catalog: DocsScopeCatalog | None
+ if not scope_rows:
+ scope_catalog = None
+ else:
+ scope_catalog = build_docs_scope_catalog(scope_rows)
+ resolution = resolve_docs_scope(normalized_query, target_terms_analysis, scope_catalog)
+ promoted_terms = promote_target_terms(raw_target_terms, target_terms_analysis, resolution)
+ refined_terms = replace(target_terms_analysis, target_terms=promoted_terms)
+ anchor_analysis = self._anchor_extractor.extract(normalized_query, refined_terms)
+ self._apply_scope_to_anchors(anchor_analysis.anchors, resolution)
features = QueryFeatures(
normalized_query=normalized_query,
- target_terms=list(target_terms_analysis.target_terms),
- endpoint_paths=list(target_terms_analysis.endpoint_paths),
+ target_terms=list(refined_terms.target_terms),
+ endpoint_paths=list(refined_terms.endpoint_paths),
file_names=list(anchor_analysis.anchors.file_names),
- matched_aliases=list(target_terms_analysis.matched_aliases),
+ matched_aliases=list(refined_terms.matched_aliases),
target_doc_hints=list(anchor_analysis.anchors.target_doc_hints),
file_markers=list(anchor_analysis.file_markers),
architecture_markers=list(anchor_analysis.architecture_markers),
logic_markers=list(anchor_analysis.logic_markers),
domain_markers=list(anchor_analysis.domain_markers),
endpoint_markers=list(anchor_analysis.endpoint_markers),
+ scope_type=resolution.scope_type,
)
llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None
llm_candidate = self._route_with_llm(
@@ -59,7 +104,6 @@ class V2IntentRouter:
anchors=anchor_analysis.anchors,
)
llm_result = self._validator.validate(llm_candidate)
- llm_result = self._apply_deterministic_corrections(llm_result, features)
if llm_result is not None:
confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features)
return V2RouteResult(
@@ -74,14 +118,53 @@ class V2IntentRouter:
routing_mode="llm_default",
llm_router_used=True,
reason_short=str(llm_result["reason_short"]),
+ scope_type=resolution.scope_type,
+ )
+ if llm_attempted:
+ return self._fallback_router.route_without_deterministic_signals(
+ user_query=user_query,
+ features=features,
+ anchors=anchor_analysis.anchors,
+ scope_type=resolution.scope_type,
)
return self._fallback_router.route(
user_query=user_query,
features=features,
anchors=anchor_analysis.anchors,
llm_attempted=llm_attempted,
+ scope_type=resolution.scope_type,
)
+ def _load_scope_rows(self, rag_session_id: str | None) -> list[dict]:
+ sid = str(rag_session_id or "").strip()
+ if not sid:
+ return []
+ if self._scope_rows_provider is not None:
+ return self._scope_rows_provider(sid)
+ try:
+ return RagQueryRepository().list_docs_scope_index_rows(sid)
+ except Exception:
+ return []
+
+ def _apply_scope_to_anchors(self, anchors, resolution) -> None:
+ anchors.candidate_domains = list(resolution.candidate_domains)
+ anchors.candidate_subdomains = list(resolution.candidate_subdomains)
+ anchors.candidate_entities = list(resolution.candidate_entities)
+ anchors.candidate_apis = list(resolution.candidate_apis)
+ if not resolution.catalog_loaded:
+ return
+ merged_endpoints = list(dict.fromkeys([*resolution.strong_endpoint_paths, *anchors.endpoint_paths]))
+ anchors.endpoint_paths = merged_endpoints
+ merged_entities = list(dict.fromkeys([*resolution.strong_entity_names, *anchors.entity_names]))
+ anchors.entity_names = merged_entities
+ if resolution.strong_domain:
+ anchors.process_domain = resolution.strong_domain
+ if resolution.strong_subdomain:
+ anchors.process_subdomain = resolution.strong_subdomain
+ if resolution.scope_type == V2ScopeType.SUBDOMAIN and resolution.strong_domain and resolution.strong_subdomain:
+ anchors.process_domain = resolution.strong_domain
+ anchors.process_subdomain = resolution.strong_subdomain
+
def _route_with_llm(self, *, features: QueryFeatures, anchors) -> dict | None:
if not self._enable_llm_disambiguation or self._llm_router is None:
return None
@@ -89,6 +172,7 @@ class V2IntentRouter:
return self._llm_router.classify(
normalized_query=features.normalized_query,
target_terms=features.target_terms,
+ scope_type=features.scope_type,
anchors={
"entity_names": anchors.entity_names,
"file_names": anchors.file_names,
@@ -97,22 +181,11 @@ class V2IntentRouter:
"matched_aliases": anchors.matched_aliases,
"process_domain": anchors.process_domain,
"process_subdomain": anchors.process_subdomain,
+ "candidate_domains": [_scope_candidate_dict(c) for c in anchors.candidate_domains],
+ "candidate_subdomains": [_scope_candidate_dict(c) for c in anchors.candidate_subdomains],
+ "candidate_entities": [_scope_candidate_dict(c) for c in anchors.candidate_entities],
+ "candidate_apis": [_scope_candidate_dict(c) for c in anchors.candidate_apis],
},
)
except Exception:
return None
-
- def _apply_deterministic_corrections(self, candidate: dict | None, features: QueryFeatures) -> dict | None:
- if candidate is None:
- return None
- if candidate.get("routing_domain") == "DOCS" and self._should_force_find_files(features):
- corrected = dict(candidate)
- corrected["subintent"] = "FIND_FILES"
- return corrected
- return candidate
-
- def _should_force_find_files(self, features: QueryFeatures) -> bool:
- if features.file_markers or features.file_names:
- return True
- query = features.normalized_query.lower()
- return "show doc" in query or "show file" in query or "doc for" in query
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py b/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py
index 0c5fc3c..a99f306 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py
+++ b/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
-from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
from app.core.agent.processes.v2.intent_router.routers.docs_subintent_resolver import DocsSubintentResolver
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py b/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py
index e1265dc..498e3c4 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py
+++ b/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py
@@ -1,13 +1,33 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
-from app.core.agent.processes.v2.models import V2Subintent
+from app.core.agent.utils.process_v2.models import V2Subintent
class DocsSubintentResolver:
+ _API_ENUM_MARKERS = (
+ "какие api",
+ "какие эндпоинты",
+ "какие endpoint",
+ "список api",
+ "список эндпоинтов",
+ "список endpoint",
+ "все api",
+ "все эндпоинты",
+ "перечисли api",
+ "перечисли эндпоинты",
+ "доступные api",
+ "available endpoints",
+ "exposed api",
+ )
+ _API_WORD_MARKERS = ("api", "эндпоинт", "endpoint", "роут", "route", "метод")
+ _LIST_WORD_MARKERS = ("какие", "список", "перечисли", "все", "доступные", "list", "available", "exposed")
+
def resolve(self, features: QueryFeatures) -> str | None:
if features.file_markers or self._has_file_like_anchor(features):
return V2Subintent.FIND_FILES
+ if self._is_api_exposed_request(features):
+ return V2Subintent.API_EXPOSED
if any(
(
features.endpoint_paths,
@@ -26,3 +46,13 @@ class DocsSubintentResolver:
hint.endswith((".md", ".yaml", ".yml", ".json"))
for hint in features.target_doc_hints
) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names)
+
+ def _is_api_exposed_request(self, features: QueryFeatures) -> bool:
+ query = features.normalized_query.lower()
+ if features.endpoint_paths:
+ return False
+ if any(marker in query for marker in self._API_ENUM_MARKERS):
+ return True
+ has_api_words = any(marker in query for marker in self._API_WORD_MARKERS)
+ has_list_words = any(marker in query for marker in self._LIST_WORD_MARKERS)
+ return has_api_words and has_list_words
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/fallback.py b/src/app/core/agent/processes/v2/intent_router/routers/fallback.py
index a88df36..245716a 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/fallback.py
+++ b/src/app/core/agent/processes/v2/intent_router/routers/fallback.py
@@ -1,10 +1,33 @@
from __future__ import annotations
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
-from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2ScopeType, V2Subintent
class V2FallbackRouter:
+ def route_without_deterministic_signals(
+ self,
+ *,
+ user_query: str,
+ features: QueryFeatures,
+ anchors,
+ scope_type: str = V2ScopeType.UNKNOWN,
+ ) -> V2RouteResult:
+ return V2RouteResult(
+ routing_domain=V2Domain.GENERAL,
+ intent=V2Intent.GENERAL_QA,
+ subintent=V2Subintent.SUMMARY,
+ user_query=user_query,
+ normalized_query=features.normalized_query,
+ target_terms=features.target_terms,
+ anchors=anchors,
+ confidence=0.0,
+ routing_mode="llm_fallback",
+ llm_router_used=True,
+ reason_short="llm route unresolved",
+ scope_type=scope_type,
+ )
+
def route(
self,
*,
@@ -12,6 +35,7 @@ class V2FallbackRouter:
features: QueryFeatures,
anchors,
llm_attempted: bool,
+ scope_type: str = V2ScopeType.UNKNOWN,
) -> V2RouteResult:
if features.file_markers:
return self._build_docs_result(
@@ -21,6 +45,32 @@ class V2FallbackRouter:
subintent=V2Subintent.FIND_FILES,
llm_attempted=llm_attempted,
reason="fallback file markers",
+ scope_type=scope_type,
+ )
+ if self._has_docs_update_signal(features):
+ return V2RouteResult(
+ routing_domain=V2Domain.DOCS,
+ intent=V2Intent.DOC_UPDATE,
+ subintent=V2Subintent.FROM_FEATURE,
+ user_query=user_query,
+ normalized_query=features.normalized_query,
+ target_terms=features.target_terms,
+ anchors=anchors,
+ confidence=0.0,
+ routing_mode=self._routing_mode(llm_attempted),
+ llm_router_used=llm_attempted,
+ reason_short="fallback docs update from feature",
+ scope_type=scope_type,
+ )
+ if self._has_api_exposed_signal(features):
+ return self._build_docs_result(
+ user_query=user_query,
+ features=features,
+ anchors=anchors,
+ subintent=V2Subintent.API_EXPOSED,
+ llm_attempted=llm_attempted,
+ reason="fallback docs api exposed",
+ scope_type=scope_type,
)
if self._has_docs_signal(features):
return self._build_docs_result(
@@ -30,6 +80,7 @@ class V2FallbackRouter:
subintent=V2Subintent.SUMMARY,
llm_attempted=llm_attempted,
reason="fallback docs summary",
+ scope_type=scope_type,
)
return V2RouteResult(
routing_domain=V2Domain.GENERAL,
@@ -43,6 +94,7 @@ class V2FallbackRouter:
routing_mode=self._routing_mode(llm_attempted),
llm_router_used=llm_attempted,
reason_short="fallback general summary",
+ scope_type=scope_type,
)
def _build_docs_result(
@@ -54,6 +106,7 @@ class V2FallbackRouter:
subintent: str,
llm_attempted: bool,
reason: str,
+ scope_type: str = V2ScopeType.UNKNOWN,
) -> V2RouteResult:
return V2RouteResult(
routing_domain=V2Domain.DOCS,
@@ -67,6 +120,7 @@ class V2FallbackRouter:
routing_mode=self._routing_mode(llm_attempted),
llm_router_used=llm_attempted,
reason_short=reason,
+ scope_type=scope_type,
)
def _has_docs_signal(self, features: QueryFeatures) -> bool:
@@ -82,5 +136,30 @@ class V2FallbackRouter:
)
)
+ def _has_api_exposed_signal(self, features: QueryFeatures) -> bool:
+ query = features.normalized_query.lower()
+ has_api = any(marker in query for marker in ("api", "эндпоинт", "endpoint", "роут", "route", "метод"))
+ has_listing = any(marker in query for marker in ("какие", "список", "перечисли", "все", "available", "list"))
+ return has_api and has_listing and not features.endpoint_paths and not features.file_markers
+
+ def _has_docs_update_signal(self, features: QueryFeatures) -> bool:
+ query = features.normalized_query.lower()
+ has_update = any(
+ marker in query
+ for marker in (
+ "обнов",
+ "измен",
+ "внести правк",
+ "docs update",
+ "update documentation",
+ "документац",
+ )
+ )
+ has_feature = any(
+ marker in query
+ for marker in ("системной аналитик", "feature", ".md", "confluence", "from feature")
+ )
+ return has_update and has_feature
+
def _routing_mode(self, llm_attempted: bool) -> str:
return "llm_fallback" if llm_attempted else "deterministic_fallback"
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/llm.py b/src/app/core/agent/processes/v2/intent_router/routers/llm.py
index 291dd96..0a04072 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/llm.py
+++ b/src/app/core/agent/processes/v2/intent_router/routers/llm.py
@@ -17,10 +17,18 @@ class V2LlmRouter:
self._prompt_name = prompt_name
self._catalog = catalog or V2RouteCatalog()
- def classify(self, *, normalized_query: str, target_terms: list[str], anchors: dict) -> dict | None:
+ def classify(
+ self,
+ *,
+ normalized_query: str,
+ target_terms: list[str],
+ anchors: dict,
+ scope_type: str = "unknown",
+ ) -> dict | None:
payload = {
"normalized_query": normalized_query,
"target_terms": target_terms,
+ "scope_type": scope_type,
"anchors": anchors,
"allowed_routes": self._catalog.allowed_routes(),
}
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml b/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml
index 7959dc1..e0a4f6d 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml
+++ b/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml
@@ -3,9 +3,12 @@ namespace: v2_intent_router
prompts:
route: |
Ты выбираешь маршрут для узкого процесса v2.
+ Поле `scope_type` и блок `anchors` с `candidate_*` — это предварительная привязка к каталогу документации текущей RAG-сессии (детерминированно извлечённые кандидаты). Не выдумывай домены, сущности и API, которых нет в этих полях; используй их для снятия неоднозначности.
Основной принцип:
- DOCS / DOC_EXPLAIN / FIND_FILES: запрос просит найти файл, документ или путь.
+ - DOCS / DOC_EXPLAIN / API_EXPOSED: запрос просит перечислить доступные API-методы/эндпоинты.
- DOCS / DOC_EXPLAIN / SUMMARY: запрос просит объяснить документацию, endpoint, архитектуру, процесс или сущность.
+ - DOCS / DOC_UPDATE / FROM_FEATURE: запрос просит обновить документацию по системной аналитике (feature markdown/confluence).
- GENERAL / GENERAL_QA / SUMMARY: общий обзорный вопрос без явного запроса к документации.
Используй только маршруты из поля `allowed_routes`.
@@ -17,8 +20,8 @@ prompts:
Ответь только JSON-объектом вида:
{
"routing_domain": "GENERAL" | "DOCS",
- "intent": "GENERAL_QA" | "DOC_EXPLAIN",
- "subintent": "SUMMARY" | "FIND_FILES",
+ "intent": "GENERAL_QA" | "DOC_EXPLAIN" | "DOC_UPDATE",
+ "subintent": "SUMMARY" | "FIND_FILES" | "API_EXPOSED" | "FROM_FEATURE",
"confidence": 0.0-1.0,
"reason_short": "короткая причина"
}
diff --git a/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py b/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py
index f4110fe..295928c 100644
--- a/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py
+++ b/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py
@@ -1,12 +1,14 @@
from __future__ import annotations
-from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2Subintent
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
class V2RouteCatalog:
_ALLOWED_ROUTES = (
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES),
+ (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED),
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY),
+ (V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE),
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY),
)
diff --git a/src/app/core/agent/processes/v2/process.py b/src/app/core/agent/processes/v2/process.py
deleted file mode 100644
index b862ce5..0000000
--- a/src/app/core/agent/processes/v2/process.py
+++ /dev/null
@@ -1,304 +0,0 @@
-"""Процесс v2: роутинг, план retrieval, вызов rag API, сборка evidence и workflow."""
-
-from __future__ import annotations
-
-from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
-from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
-from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
-from app.core.agent.processes.v2.intent_router import V2IntentRouter
-from app.core.agent.processes.v2.models import V2Intent, V2Subintent
-from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
-from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
- RagRowIndex,
- merge_row_lists,
- normalize_doc_path,
- normalized_path_set,
- row_path,
- seed_candidates_from_target_hints,
-)
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
-from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
-from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
-from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
-from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
-from app.core.agent.processes.base import AgentProcess, ProcessResult
-from app.core.agent.utils.llm import AgentLlmService
-
-
-class V2Process(AgentProcess):
- version = "v2"
-
- def __init__(
- self,
- llm: AgentLlmService,
- policy_resolver: V2RetrievalPolicyResolver,
- rag_adapter: V2RagRetrievalAdapter,
- evidence_assembler: DocsEvidenceAssembler,
- evidence_gate: DocsEvidenceGate | None = None,
- router: V2IntentRouter | None = None,
- docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
- general_summary_prompt_name: str = "v2_general.summary_answer",
- workflow_llm_enabled: bool = True,
- ) -> None:
- self._router = router or V2IntentRouter()
- self._policy_resolver = policy_resolver
- self._rag_adapter = rag_adapter
- self._evidence_assembler = evidence_assembler
- self._evidence_gate = evidence_gate or DocsEvidenceGate()
- self._docs_summary_prompt_name = docs_summary_prompt_name
- self._general_summary_prompt_name = general_summary_prompt_name
- self._workflow_llm_enabled = workflow_llm_enabled
- self._summary_graph = DocsExplainSummaryGraph(llm)
- self._find_files_graph = DocsExplainFindFilesGraph()
- self._general_summary_graph = GeneralSummaryGraph(llm)
-
- async def run(self, context) -> ProcessResult:
- route = self._router.route(context.request.message)
- rag_session_id = context.session.active_rag_session_id
- context.trace.module("process.v2").log(
- "intent_routed",
- {
- "routing_domain": route.routing_domain,
- "intent": route.intent,
- "subintent": route.subintent,
- "normalized_query": route.normalized_query,
- "target_terms": route.target_terms,
- "anchors": route_anchor_summary(route),
- "confidence": route.confidence,
- "routing_mode": route.routing_mode,
- "llm_router_used": route.llm_router_used,
- "reason_short": route.reason_short,
- "rag_session_id": rag_session_id,
- },
- )
- self._log_step(
- context,
- "router_resolved",
- {
- "domain": route.routing_domain,
- "intent": route.intent,
- "subintent": route.subintent,
- "confidence": route.confidence,
- },
- )
- self._log_step(
- context,
- "anchors_extracted",
- {
- "signal_types": route_anchor_summary(route)["signal_types"],
- "endpoint_paths": route.anchors.endpoint_paths,
- "target_doc_hints": route.anchors.target_doc_hints,
- "matched_aliases": route.anchors.matched_aliases,
- "target_terms": route.target_terms,
- },
- )
- self._log_step(
- context,
- "alias_resolution",
- {
- "resolved_aliases": route.anchors.matched_aliases,
- "target_doc_hints": route.anchors.target_doc_hints,
- },
- )
- if not rag_session_id:
- if route.intent == V2Intent.GENERAL_QA:
- answer = "Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией."
- self._log_step(context, "evidence_gate_checked", {"passed": False, "reason": "missing_rag_session"})
- self._log_step(context, "answer_generated", {"answer_mode": "insufficient_evidence"})
- return ProcessResult(answer=answer)
- return ProcessResult(answer="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией.")
- plan = self._policy_resolver.resolve(route)
- context.trace.module("process.v2.retrieval_policy").log(
- "retrieval_plan_resolved",
- {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
- )
- self._log_step(
- context,
- "retrieval_profile_selected",
- {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
- )
- retrieved_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan)
- metadata_rows = self._metadata_lookup_candidates(retrieved_rows, route)
- rows = self._merge_candidate_rows(retrieved_rows, metadata_rows)
- rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
- self._print_missing_target_hints(route, rows)
- context.trace.module("process.v2.rag_retrieval").log(
- "rag_rows_fetched",
- {
- "profile": plan.profile,
- "row_count": len(rows),
- "rows": [self._trace_row(row) for row in rows],
- },
- )
- self._log_step(
- context,
- "candidate_generation",
- {
- "query": route.user_query,
- "profile": plan.profile,
- "details": {
- "target_doc_hints": list(route.anchors.target_doc_hints),
- "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
- },
- "resolved_aliases": route.anchors.matched_aliases,
- "target_doc_hints": route.anchors.target_doc_hints,
- "candidate_docs_before_ranking": [self._trace_row(row) for row in rows[:8]],
- "sources": {
- "seeded": [self._trace_row(row) for row in retrieved_rows[:5] if row_path(row) in {normalize_doc_path(h) for h in route.anchors.target_doc_hints}],
- "metadata_lookup": [self._trace_row(row) for row in metadata_rows[:5]],
- "semantic": [self._trace_row(row) for row in retrieved_rows[:5]],
- },
- },
- )
- self._log_step(
- context,
- "retrieval_executed",
- {
- "query": route.user_query,
- "profile": plan.profile,
- "row_count": len(rows),
- "target_doc_hints": route.anchors.target_doc_hints,
- "top_results": [self._trace_row(row) for row in rows[:5]],
- },
- )
- if route.subintent == V2Subintent.FIND_FILES:
- files = self._evidence_assembler.assemble_files(rows, route)
- gate = self._evidence_gate.check_files(route, files)
- context.trace.module("process.v2.evidence").log(
- "evidence_assembled",
- {"mode": "find_files", "file_count": len(files), "files": [file.path for file in files]},
- )
- self._log_step(
- context,
- "evidence_assembled",
- {"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)},
- )
- self._log_ranking(context, files)
- self._log_step(
- context,
- "evidence_gate_checked",
- {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
- )
- flow_context = DocsExplainFindFilesContext(
- runtime=context,
- route=route,
- rag_session_id=rag_session_id,
- files=files,
- gate_decision=gate,
- )
- flow_context = await self._find_files_graph.run(flow_context)
- self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
- return ProcessResult(answer=flow_context.answer)
- documents = self._evidence_assembler.assemble_summaries(rows, route)
- gate = self._evidence_gate.check_summaries(route, documents)
- context.trace.module("process.v2.evidence").log(
- "evidence_assembled",
- {"mode": "summary", "document_count": len(documents), "documents": [item.path for item in documents]},
- )
- self._log_step(
- context,
- "evidence_assembled",
- {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
- )
- self._log_ranking(context, documents)
- self._log_step(
- context,
- "evidence_gate_checked",
- {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
- )
- if route.intent == V2Intent.GENERAL_QA:
- flow_context = GeneralSummaryContext(
- runtime=context,
- route=route,
- prompt_name=self._general_summary_prompt_name,
- workflow_llm_enabled=self._workflow_llm_enabled,
- documents=documents,
- gate_decision=gate,
- )
- flow_context = await self._general_summary_graph.run(flow_context)
- self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
- return ProcessResult(answer=flow_context.answer)
- flow_context = DocsExplainSummaryContext(
- runtime=context,
- route=route,
- rag_session_id=rag_session_id,
- prompt_name=self._docs_summary_prompt_name,
- workflow_llm_enabled=self._workflow_llm_enabled,
- documents=documents,
- gate_decision=gate,
- )
- flow_context = await self._summary_graph.run(flow_context)
- self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
- return ProcessResult(answer=flow_context.answer)
-
- def _trace_row(self, row: dict) -> dict[str, object]:
- metadata = row.get("metadata") or {}
- content = str(row.get("content") or "").strip()
- return {
- "layer": str(row.get("layer") or ""),
- "path": str(row.get("path") or ""),
- "title": str(row.get("title") or ""),
- "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""),
- "entity_name": str(metadata.get("entity_name") or ""),
- "summary_text": str(metadata.get("summary_text") or "")[:400],
- "section_path": str(metadata.get("section_path") or ""),
- "content_preview": content[:400],
- }
-
- def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
- context.trace.module("process.v2.pipeline").log(step, payload)
-
- def _print_missing_target_hints(self, route, rows: list[dict]) -> None:
- if not route.anchors.target_doc_hints:
- return
- candidate_paths = normalized_path_set(rows)
- for hint in route.anchors.target_doc_hints:
- if not str(hint or "").strip():
- continue
- normalized = normalize_doc_path(hint)
- if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
- continue
- if normalized not in candidate_paths:
- print("ERROR: target doc missing from candidates:", normalized)
-
- def _metadata_lookup_candidates(self, rows: list[dict], route) -> list[dict]:
- return DocsMetadataLookupIndex(rows).lookup(route)
-
- def _merge_candidate_rows(self, *groups: list[dict]) -> list[dict]:
- return merge_row_lists(*groups)
-
- def _log_ranking(self, context, items: list) -> None:
- top_docs: list[dict[str, object]] = []
- for item in items[:4]:
- top_docs.append(
- {
- "doc": getattr(item, "path", ""),
- "score": getattr(item, "score", 0),
- "match_reason": getattr(item, "match_reason", ""),
- }
- )
- context.trace.module("process.v2.pipeline").log(
- "ranking_explained",
- {
- "doc": getattr(item, "path", ""),
- "score_breakdown": getattr(item, "score_breakdown", {}),
- "score": getattr(item, "score", 0),
- "match_reason": getattr(item, "match_reason", ""),
- },
- )
- context.trace.module("process.v2.pipeline").log(
- "ranking_explained",
- {
- "top_docs_after_ranking": top_docs,
- "ranking_score_breakdown": [
- {
- "doc": getattr(item, "path", ""),
- "score_breakdown": getattr(item, "score_breakdown", {}),
- }
- for item in items[:4]
- ],
- },
- )
diff --git a/src/app/core/agent/processes/v2/retrieval/__init__.py b/src/app/core/agent/processes/v2/retrieval/__init__.py
deleted file mode 100644
index 11efcb3..0000000
--- a/src/app/core/agent/processes/v2/retrieval/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from app.core.agent.processes.v2.retrieval.metadata_lookup import DocsMetadataLookupIndex
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
-from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
- RagRowIndex,
- normalize_doc_path,
- seed_candidates_from_target_hints,
-)
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
-
-__all__ = [
- "V2RetrievalPolicyResolver",
- "V2RagRetrievalAdapter",
- "DocsMetadataLookupIndex",
- "normalize_doc_path",
- "RagRowIndex",
- "seed_candidates_from_target_hints",
-]
diff --git a/src/app/core/agent/processes/v2/retrieval/policy_resolver.py b/src/app/core/agent/processes/v2/retrieval/policy_resolver.py
deleted file mode 100644
index 3184a73..0000000
--- a/src/app/core/agent/processes/v2/retrieval/policy_resolver.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""Intent-aware retrieval policy resolver for process v2."""
-
-from __future__ import annotations
-
-from app.core.agent.processes.v2.anchor_signals import anchor_signal_types
-from app.core.agent.processes.v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
-from app.core.rag.contracts.enums import RagLayer
-from app.core.rag.retrieval.session_retriever import RetrievalPlan
-
-
-class _AnchorTermCollector:
- def prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
- terms = self._hint_basenames(route)
- terms.extend(route.anchors.endpoint_paths)
- terms.extend(route.target_terms)
- terms.extend(route.anchors.file_names)
- terms.extend(route.anchors.entity_names)
- terms.extend(route.anchors.matched_aliases)
- terms.extend(self._process_terms(route))
- return [f"%{term.lower()}%" for term in _unique_terms(terms)]
-
- def find_files_patterns(self, route: V2RouteResult) -> list[str]:
- if route.anchors.target_doc_hints:
- return [f"%{name.lower()}%" for name in self._hint_basenames(route)]
- return self.prefer_like_patterns(route)
-
- def api_method_patterns(self, route: V2RouteResult) -> list[str]:
- terms = self._hint_basenames(route)
- terms.extend(route.anchors.target_doc_hints)
- terms.extend(route.anchors.endpoint_paths)
- terms.extend(route.target_terms)
- patterns: list[str] = []
- for term in _unique_terms(terms):
- lowered = term.lower()
- stripped = lowered.strip("/")
- if stripped:
- patterns.append(f"%{stripped}%")
- if lowered:
- patterns.append(f"%{lowered}%")
- return _unique_terms(patterns)
-
- def _hint_basenames(self, route: V2RouteResult) -> list[str]:
- return [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
-
- def _process_terms(self, route: V2RouteResult) -> list[str]:
- terms: list[str] = []
- if route.anchors.process_domain:
- terms.append(route.anchors.process_domain)
- if route.anchors.process_subdomain:
- terms.append(route.anchors.process_subdomain)
- return terms
-
-
-class _RouteFilterBuilder:
- _API_DOC_PREFIXES = [
- "docs/api/",
- "docs/endpoints/",
- "docs/methods/",
- "api/",
- "endpoints/",
- "methods/",
- ]
-
- def __init__(self) -> None:
- self._terms = _AnchorTermCollector()
-
- def general_filters(self, route: V2RouteResult) -> dict[str, object]:
- return {
- "prefer_path_prefixes": ["docs/architecture/", "docs/"],
- "prefer_like_patterns": ["%readme.md%", "%overview%"],
- "target_doc_hints": list(route.anchors.target_doc_hints),
- }
-
- def summary_filters(self, route: V2RouteResult) -> dict[str, object]:
- if _is_api_method_explain(route):
- return self.api_method_filters(route)
- filters = self._base_filters(route)
- filters["prefer_path_prefixes"] = self._summary_prefixes(route)
- filters["prefer_like_patterns"] = self._terms.prefer_like_patterns(route)
- if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
- filters["path_prefixes"] = ["docs/api/", "docs/"]
- return filters
-
- def api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
- filters = self._base_filters(route)
- filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
- filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
- filters["prefer_like_patterns"] = self._terms.api_method_patterns(route)
- return filters
-
- def find_files_filters(self, route: V2RouteResult) -> dict[str, object]:
- filters = self._base_filters(route)
- prefixes = self._find_files_prefixes(route)
- if prefixes:
- filters["path_prefixes"] = prefixes
- filters["prefer_path_prefixes"] = self._find_files_prefer_prefixes(route, prefixes)
- filters["prefer_like_patterns"] = self._terms.find_files_patterns(route)
- return filters
-
- def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
- filters: dict[str, object] = {
- "target_doc_hints": list(route.anchors.target_doc_hints),
- }
- if route.anchors.process_domain:
- filters["metadata.domain"] = route.anchors.process_domain
- if route.anchors.process_subdomain:
- filters["metadata.subdomain"] = route.anchors.process_subdomain
- return filters
-
- def _find_files_prefixes(self, route: V2RouteResult) -> list[str]:
- hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
- if hint_prefixes:
- return hint_prefixes
- file_prefixes = [name for name in route.anchors.file_names if str(name).strip().startswith("docs/")]
- derived = _prefixes_from_paths(file_prefixes)
- if derived:
- return derived
- signals = anchor_signal_types(route)
- if V2AnchorType.API_ENDPOINT in signals:
- return ["docs/api/", "docs/"]
- if V2AnchorType.ARCHITECTURE in signals:
- return ["docs/architecture/", "docs/"]
- if V2AnchorType.LOGIC_FLOW in signals:
- return ["docs/logic/", "docs/"]
- if V2AnchorType.DOMAIN_ENTITY in signals:
- return ["docs/domains/", "docs/"]
- return ["docs/"]
-
- def _find_files_prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
- preferred = list(prefixes)
- if route.anchors.process_domain or route.anchors.process_subdomain:
- preferred.extend(["docs/domains/", "docs/logic/"])
- return _unique_terms(preferred or ["docs/"])
-
- def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
- signals = anchor_signal_types(route)
- prefixes: list[str] = []
- if V2AnchorType.API_ENDPOINT in signals:
- prefixes.extend(["docs/api/", "docs/"])
- if V2AnchorType.ARCHITECTURE in signals:
- prefixes.extend(["docs/architecture/", "docs/"])
- if V2AnchorType.LOGIC_FLOW in signals:
- prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
- if V2AnchorType.DOMAIN_ENTITY in signals:
- prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
- return _unique_terms(prefixes or ["docs/"])
-
-
-class V2RetrievalPolicyResolver:
- _GENERAL_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS]
- _FIND_FILES_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
- _SUMMARY_LAYERS = {
- "docs_api_method_explain": [
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_FACT_INDEX,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- "docs_summary_api_endpoint": [
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_FACT_INDEX,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- "docs_summary_logic_flow": [
- RagLayer.DOCS_WORKFLOW_INDEX,
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- "docs_summary_domain_entity": [
- RagLayer.DOCS_ENTITY_CATALOG,
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- "docs_summary_architecture": [
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_RELATION_GRAPH,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- "docs_summary_generic": [
- RagLayer.DOCS_DOCUMENT_CATALOG,
- RagLayer.DOCS_DOC_CHUNKS,
- ],
- }
-
- def __init__(self) -> None:
- self._filters = _RouteFilterBuilder()
-
- def resolve(self, route: V2RouteResult) -> RetrievalPlan:
- if route.intent == V2Intent.GENERAL_QA:
- return RetrievalPlan(
- profile="general_qa_grounded_summary",
- layers=list(self._GENERAL_LAYERS),
- limit=8,
- filters=self._filters.general_filters(route),
- )
- if route.subintent == V2Subintent.FIND_FILES:
- return RetrievalPlan(
- profile="file_lookup",
- layers=list(self._FIND_FILES_LAYERS),
- limit=12,
- filters=self._filters.find_files_filters(route),
- )
- profile = self._summary_profile(route)
- return RetrievalPlan(
- profile=profile,
- layers=list(self._SUMMARY_LAYERS[profile]),
- limit=10 if profile == "docs_api_method_explain" else 8,
- filters=self._filters.summary_filters(route),
- )
-
- def _summary_profile(self, route: V2RouteResult) -> str:
- if _is_api_method_explain(route):
- return "docs_api_method_explain"
- meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
- if len(meaningful) != 1:
- return "docs_summary_generic"
- mapping = {
- V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
- V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
- V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
- V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
- }
- return mapping.get(next(iter(meaningful)), "docs_summary_generic")
-
-
-def _prefixes_from_paths(paths: list[str]) -> list[str]:
- prefixes = []
- for path in paths:
- value = str(path).strip().strip("/")
- if "/" not in value:
- continue
- prefix = value.rsplit("/", 1)[0] + "/"
- if prefix:
- prefixes.append(prefix)
- return _unique_terms(prefixes)
-
-
-def _unique_terms(items: list[str]) -> list[str]:
- seen: set[str] = set()
- unique: list[str] = []
- for raw in items:
- value = str(raw or "").strip()
- if not value or value in seen:
- continue
- seen.add(value)
- unique.append(value)
- return unique
-
-
-def _is_api_method_explain(route: V2RouteResult) -> bool:
- if route.subintent != V2Subintent.SUMMARY:
- return False
- if route.anchors.endpoint_paths:
- return True
- if _has_api_like_hints(route.anchors.target_doc_hints):
- return True
- return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
-
-
-def _has_api_like_hints(hints: list[str]) -> bool:
- for hint in hints:
- value = str(hint or "").strip().lower()
- if not value:
- continue
- if value.startswith("/"):
- return True
- if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
- return True
- if "endpoint" in value or "method" in value:
- return True
- return False
diff --git a/src/app/core/agent/processes/v2/v2_process.py b/src/app/core/agent/processes/v2/v2_process.py
new file mode 100644
index 0000000..ddf6ef3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/v2_process.py
@@ -0,0 +1,194 @@
+"""Процесс v2: роутинг запроса и dispatch в workflow."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.core.agent.processes.base import AgentProcess, ProcessResult
+from app.core.agent.processes.v2.intent_router import V2IntentRouter
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
+ DocExplainApiExposedContext,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.graph import DocUpdateFromFeatureGraph
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import (
+ DocUpdateFromFeatureContext,
+)
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph
+from app.core.agent.utils.llm import AgentLlmService
+from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+
+
+class V2Process(AgentProcess):
+ version = "v2"
+
+ def __init__(
+ self,
+ llm: AgentLlmService,
+ policy_resolver: RetrievalPlanResolver,
+ rag_adapter: V2RagRetrievalAdapter,
+ evidence_assembler: DocsEvidenceAssembler,
+ evidence_gate: DocsEvidenceGate | None = None,
+ router: V2IntentRouter | None = None,
+ docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
+ general_summary_prompt_name: str = "v2_general.summary_answer",
+ workflow_llm_enabled: bool = True,
+ doc_rules_enabled: bool = True,
+ ) -> None:
+ self._router = router or V2IntentRouter()
+ gate = evidence_gate or DocsEvidenceGate()
+ self._docs_summary_prompt_name = docs_summary_prompt_name
+ self._general_summary_prompt_name = general_summary_prompt_name
+ self._workflow_llm_enabled = workflow_llm_enabled
+ self._doc_rules_enabled = doc_rules_enabled
+ self._workflows: dict[tuple[str, str, str], Any] = {
+ (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY): DocExplainSummaryGraph(
+ llm,
+ policy_resolver=policy_resolver,
+ rag_adapter=rag_adapter,
+ evidence_assembler=evidence_assembler,
+ evidence_gate=gate,
+ ),
+ (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES): DocExplainFindFilesGraph(
+ policy_resolver=policy_resolver,
+ rag_adapter=rag_adapter,
+ evidence_assembler=evidence_assembler,
+ evidence_gate=gate,
+ ),
+ (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED): DocExplainApiExposedGraph(
+ policy_resolver=policy_resolver,
+ rag_adapter=rag_adapter,
+ ),
+ (V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE): DocUpdateFromFeatureGraph(
+ llm=llm,
+ doc_rules_enabled=doc_rules_enabled,
+ ),
+ (V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY): GeneralQaSummaryGraph(
+ llm,
+ policy_resolver=policy_resolver,
+ rag_adapter=rag_adapter,
+ evidence_assembler=evidence_assembler,
+ evidence_gate=gate,
+ ),
+ }
+
+ async def run(self, context) -> ProcessResult:
+ rag_session_id = context.session.active_rag_session_id or ""
+ route = self._router.route(context.request.message, rag_session_id=rag_session_id or None)
+ context.trace.module("process.v2").log(
+ "intent_routed",
+ {
+ "routing_domain": route.routing_domain,
+ "intent": route.intent,
+ "subintent": route.subintent,
+ "normalized_query": route.normalized_query,
+ "target_terms": route.target_terms,
+ "anchors": route_anchor_summary(route),
+ "confidence": route.confidence,
+ "routing_mode": route.routing_mode,
+ "llm_router_used": route.llm_router_used,
+ "reason_short": route.reason_short,
+ "rag_session_id": rag_session_id,
+ },
+ )
+ self._log_step(
+ context,
+ "router_resolved",
+ {
+ "domain": route.routing_domain,
+ "intent": route.intent,
+ "subintent": route.subintent,
+ "confidence": route.confidence,
+ },
+ )
+ self._log_step(
+ context,
+ "anchors_extracted",
+ {
+ "signal_types": route_anchor_summary(route)["signal_types"],
+ "endpoint_paths": route.anchors.endpoint_paths,
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "matched_aliases": route.anchors.matched_aliases,
+ "target_terms": route.target_terms,
+ },
+ )
+ self._log_step(
+ context,
+ "alias_resolution",
+ {
+ "resolved_aliases": route.anchors.matched_aliases,
+ "target_doc_hints": route.anchors.target_doc_hints,
+ },
+ )
+ flow_context = await self._run_workflow(context, route, rag_session_id)
+ if flow_context.answer_generated_payload is not None:
+ self._log_step(context, "answer_generated", dict(flow_context.answer_generated_payload))
+ changeset = list(getattr(flow_context, "changeset", []) or [])
+ apply_changeset = bool(getattr(flow_context, "apply_changeset", False))
+ return ProcessResult(
+ answer=flow_context.answer,
+ changeset=changeset,
+ apply_changeset=apply_changeset,
+ )
+
+ def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
+ context.trace.module("process.v2.pipeline").log(step, payload)
+
+ async def _run_workflow(self, runtime_context, route, rag_session_id: str):
+ workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent))
+ if workflow is None:
+ raise ValueError(f"Unsupported v2 workflow route: {(route.routing_domain, route.intent, route.subintent)!r}")
+ if route.intent == V2Intent.GENERAL_QA:
+ return await workflow.run(
+ GeneralQaSummaryContext(
+ runtime=runtime_context,
+ route=route,
+ rag_session_id=rag_session_id,
+ prompt_name=self._general_summary_prompt_name,
+ workflow_llm_enabled=self._workflow_llm_enabled,
+ )
+ )
+ if route.subintent == V2Subintent.FIND_FILES:
+ return await workflow.run(
+ DocExplainFindFilesContext(
+ runtime=runtime_context,
+ route=route,
+ rag_session_id=rag_session_id,
+ )
+ )
+ if route.subintent == V2Subintent.API_EXPOSED:
+ return await workflow.run(
+ DocExplainApiExposedContext(
+ runtime=runtime_context,
+ route=route,
+ rag_session_id=rag_session_id,
+ )
+ )
+ if route.intent == V2Intent.DOC_UPDATE and route.subintent == V2Subintent.FROM_FEATURE:
+ return await workflow.run(
+ DocUpdateFromFeatureContext(
+ runtime=runtime_context,
+ route=route,
+ rag_session_id=rag_session_id,
+ doc_rules_enabled=self._doc_rules_enabled,
+ )
+ )
+ return await workflow.run(
+ DocExplainSummaryContext(
+ runtime=runtime_context,
+ route=route,
+ rag_session_id=rag_session_id,
+ prompt_name=self._docs_summary_prompt_name,
+ workflow_llm_enabled=self._workflow_llm_enabled,
+ )
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md
new file mode 100644
index 0000000..3dcf97b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md
@@ -0,0 +1,17 @@
+# DOC_EXPLAIN / API_EXPOSED Workflow
+
+## Контракт сабинтента
+
+| Поле | Значение |
+|---|---|
+| `domain` | `DOCS` |
+| `intent` | `DOC_EXPLAIN` |
+| `subintent` | `API_EXPOSED` |
+| `workflow_id` | `v2.docs_explain.api_exposed` |
+| `source` | `workflow.v2.api_exposed` |
+
+## Выходной формат
+
+Ответ формируется детерминированно как список endpoint-путей (`/path`) по одному на строку.
+Scope учитывается через retrieval-policy фильтры `metadata.domain`/`metadata.subdomain` и path-префиксы API-документации.
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py
new file mode 100644
index 0000000..48f7400
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py
@@ -0,0 +1,4 @@
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
+
+__all__ = ["DocExplainApiExposedGraph"]
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py
new file mode 100644
index 0000000..2d62f50
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.build_api_exposed_evidence_step import (
+ BuildApiExposedEvidenceStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.fetch_rag_rows_step import FetchRagRowsStep
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.finalize_api_exposed_answer_step import (
+ FinalizeApiExposedAnswerStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.require_rag_session_step import (
+ RequireRagSessionStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.resolve_retrieval_plan_step import (
+ ResolveRetrievalPlanStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
+ ApiEndpointCollector,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.buffered_graph import (
+ DocExplainApiExposedWorkflowGraph,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
+ DocExplainApiExposedContext,
+)
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+
+
+class DocExplainApiExposedGraph(DocExplainApiExposedWorkflowGraph[DocExplainApiExposedContext]):
+ def __init__(
+ self,
+ policy_resolver: RetrievalPlanResolver,
+ rag_adapter: V2RagRetrievalAdapter,
+ ) -> None:
+ super().__init__(
+ workflow_id="v2.docs_explain.api_exposed",
+ source="workflow.v2.api_exposed",
+ steps=[
+ RequireRagSessionStep(
+ missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
+ ),
+ ResolveRetrievalPlanStep(policy_resolver),
+ FetchRagRowsStep(rag_adapter),
+ BuildApiExposedEvidenceStep(ApiEndpointCollector()),
+ FinalizeApiExposedAnswerStep(),
+ ],
+ )
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py
new file mode 100644
index 0000000..bc0742b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py
@@ -0,0 +1,2 @@
+"""Steps for DOC_EXPLAIN/API_EXPOSED workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py
new file mode 100644
index 0000000..c385a79
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
+ ApiEndpointCollector,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import ApiWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=ApiWorkflowContext)
+
+
+class BuildApiExposedEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "build_api_exposed_evidence"
+ title = "Сборка списка API"
+
+ def __init__(self, collector: ApiEndpointCollector) -> None:
+ self._collector = collector
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.endpoints = self._collector.collect(context.retrieved_rows)
+ context.runtime.trace.module("process.v2.evidence").log(
+ "evidence_assembled",
+ {"mode": "api_exposed", "endpoint_count": len(context.endpoints), "endpoints": context.endpoints},
+ )
+ log_pipeline_step(
+ context.runtime,
+ "evidence_assembled",
+ {"mode": "api_exposed", "endpoint_count": len(context.endpoints)},
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"endpoint_count": len(context.endpoints)}
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py
new file mode 100644
index 0000000..2eefcdc
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "fetch_rag_rows"
+ title = "Получение строк из RAG"
+
+ def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
+ self._rag_adapter = rag_adapter
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ context.retrieved_rows = await self._rag_adapter.fetch_rows(
+ context.rag_session_id,
+ context.route.normalized_query,
+ context.retrieval_plan,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"retrieved_row_count": len(context.retrieved_rows)}
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py
new file mode 100644
index 0000000..3f8cde6
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import DocExplainApiExposedContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class FinalizeApiExposedAnswerStep(WorkflowStep[DocExplainApiExposedContext]):
+ step_id = "finalize_api_exposed_answer"
+ title = "Формирование ответа со списком API"
+
+ async def run(self, context: DocExplainApiExposedContext) -> DocExplainApiExposedContext:
+ if context.answer:
+ return context
+ if not context.endpoints:
+ context.answer = "Не нашёл задокументированных API-эндпоинтов в выбранном scope."
+ context.answer_generated_payload = {
+ "answer_mode": "insufficient_evidence",
+ "answer_length": len(context.answer),
+ }
+ return context
+ context.answer = "\n".join(context.endpoints)
+ context.answer_generated_payload = {
+ "answer_mode": "deterministic",
+ "answer_length": len(context.answer),
+ }
+ return context
+
+ def trace_output(self, context: DocExplainApiExposedContext) -> dict[str, object]:
+ return {"answer_length": len(context.answer)}
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py
new file mode 100644
index 0000000..55bdc65
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "require_rag_session"
+ title = "Проверка RAG-сессии"
+
+ def __init__(self, *, missing_message: str) -> None:
+ self._missing_message = missing_message
+
+ async def run(self, context: TContext) -> TContext:
+ if context.rag_session_id:
+ return context
+ context.answer = self._missing_message
+ context.answer_generated_payload = {
+ "answer_mode": "insufficient_evidence",
+ "answer_length": len(context.answer),
+ }
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"has_rag_session": bool(context.rag_session_id)}
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py
new file mode 100644
index 0000000..6d782d6
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "resolve_retrieval_plan"
+ title = "Выбор retrieval-плана"
+
+ def __init__(self, resolver: RetrievalPlanResolver) -> None:
+ self._resolver = resolver
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ plan = self._resolver.resolve(context.route)
+ context.retrieval_plan = plan
+ context.runtime.trace.module("process.v2.retrieval_policy").log(
+ "retrieval_plan_resolved",
+ {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
+ )
+ log_pipeline_step(
+ context.runtime,
+ "retrieval_profile_selected",
+ {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"profile": getattr(context.retrieval_plan, "profile", "")}
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py
new file mode 100644
index 0000000..9df89fb
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py
@@ -0,0 +1,2 @@
+"""Retrieval helpers for DOC_EXPLAIN/API_EXPOSED workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py
new file mode 100644
index 0000000..4f55a6c
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import re
+
+
+class ApiEndpointCollector:
+ _METHODS = ("GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS")
+ _ENDPOINT_VALUE_RE = re.compile(
+ r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
+ )
+ _METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)")
+ _PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)")
+ _DOC_EXTS = (".md", ".yaml", ".yml", ".json")
+
+ def collect(self, rows: list[dict]) -> list[str]:
+ endpoints: list[str] = []
+ for row in rows:
+ self._append_from_endpoint_metadata(endpoints, row)
+ self._append_from_title_fallback(endpoints, row)
+ for raw in self._row_candidates(row):
+ self._append_from_text(endpoints, raw)
+ return sorted(set(endpoints))
+
+ def _append_from_title_fallback(self, out: list[str], row: dict) -> None:
+ title = str(row.get("title") or "").strip()
+ if not title:
+ return
+ for match in self._PATH_RE.findall(title):
+ self._append_default(out, match)
+
+ def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
+ metadata = dict(row.get("metadata") or {})
+ endpoint_value = str(metadata.get("endpoint") or "").strip()
+ if not endpoint_value:
+ return
+ for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
+ self._append_methods_with_path(out, methods, path)
+
+ def _row_candidates(self, row: dict) -> list[str]:
+ metadata = dict(row.get("metadata") or {})
+ values = [
+ metadata.get("name"),
+ metadata.get("summary_text"),
+ row.get("title"),
+ ]
+ return [str(value or "") for value in values if str(value or "").strip()]
+
+ def _append_from_text(self, out: list[str], text: str) -> None:
+ for method, path in self._METHOD_PATH_RE.findall(text):
+ self._append_with_method(out, method, path)
+
+ def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
+ methods = [
+ part.strip().upper()
+ for part in str(methods_raw or "").split("|")
+ if part.strip().upper() in self._METHODS
+ ]
+ if not methods:
+ self._append_default(out, path_raw)
+ return
+ for method in methods:
+ self._append_with_method(out, method, path_raw)
+
+ def _append_default(self, out: list[str], raw: str) -> None:
+ self._append_with_method(out, "GET", raw)
+
+ def _append_with_method(self, out: list[str], method: str, raw: str) -> None:
+ value = str(raw or "").strip().strip("`'\"()[].,:;!?").lower()
+ if not value.startswith("/"):
+ return
+ if value.endswith(self._DOC_EXTS):
+ return
+ if len(value.split("/")) < 2:
+ return
+ endpoint = f"{method.upper()} {value}"
+ if endpoint not in out:
+ out.append(endpoint)
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py
new file mode 100644
index 0000000..af1972e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult, V2Subintent
+from app.core.rag.contracts.enums import RagLayer
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class DocExplainApiExposedRetrievalPolicy:
+ _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG]
+ _API_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
+
+ def supports(self, route: V2RouteResult) -> bool:
+ return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.API_EXPOSED
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ return RetrievalPlan(
+ profile="api_exposed",
+ layers=list(self._LAYERS),
+ limit=400,
+ filters=self._filters(route),
+ )
+
+ def _filters(self, route: V2RouteResult) -> dict[str, object]:
+ query_signals = self._query_signals(route)
+ filters: dict[str, object] = {
+ "metadata.type": "api_method",
+ "prefer_path_prefixes": list(self._API_PREFIXES),
+ "target_doc_hints": list(route.anchors.target_doc_hints),
+ "prefer_like_patterns": self._like_patterns(route),
+ }
+ if query_signals:
+ filters["query_signals"] = query_signals
+ if route.anchors.process_domain:
+ filters["metadata.domain"] = route.anchors.process_domain
+ if route.anchors.process_subdomain:
+ filters["metadata.subdomain"] = route.anchors.process_subdomain
+ return filters
+
+ def _like_patterns(self, route: V2RouteResult) -> list[str]:
+ raw: list[str] = ["api", "endpoint", "method", "эндпоинт", "метод"]
+ raw.extend(route.target_terms)
+ raw.extend(route.anchors.endpoint_paths)
+ raw.extend(route.anchors.target_doc_hints)
+ raw.extend(candidate.value for candidate in route.anchors.candidate_apis)
+ return [f"%{item.lower()}%" for item in _unique(raw)]
+
+ def _query_signals(self, route: V2RouteResult) -> list[str]:
+ raw: list[str] = []
+ raw.extend(route.target_terms)
+ raw.extend(route.anchors.endpoint_paths)
+ blocked = {"api", "endpoint", "method", "эндпоинт", "метод"}
+ return [item for item in _unique(raw) if item.lower() not in blocked]
+
+
+def _unique(items: list[str]) -> list[str]:
+ out: list[str] = []
+ seen: set[str] = set()
+ for item in items:
+ value = str(item or "").strip()
+ if not value or value in seen:
+ continue
+ seen.add(value)
+ out.append(value)
+ return out
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py
new file mode 100644
index 0000000..359a31b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py
@@ -0,0 +1,2 @@
+"""Runtime helpers for the DOC_EXPLAIN/API_EXPOSED workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py
new file mode 100644
index 0000000..6e17aa8
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py
@@ -0,0 +1,42 @@
+"""Buffered graph for DOC_EXPLAIN/API_EXPOSED workflow."""
+
+from __future__ import annotations
+
+from typing import TypeVar
+
+from app.core.agent.utils.workflow.context import WorkflowContext
+from app.core.agent.utils.workflow.graph import WorkflowGraph
+
+TContext = TypeVar("TContext", bound=WorkflowContext)
+
+
+class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]):
+ async def run(self, context: TContext) -> TContext:
+ trace = context.runtime.trace.module(self._source)
+ trace.log("workflow_started", {"workflow_id": self._workflow_id})
+ steps_buffer: list[dict[str, object]] = []
+ for step in self._steps:
+ inp = step.trace_input(context)
+ request_id = context.runtime.request.request_id
+ await context.runtime.publisher.publish_status(
+ request_id,
+ self._source,
+ f"Шаг workflow: {step.title}.",
+ {"workflow_id": self._workflow_id, "step_id": step.step_id},
+ )
+ context = await step.run(context)
+ out = step.trace_output(context)
+ trace.log(
+ "workflow_step_traced",
+ {
+ "workflow_id": self._workflow_id,
+ "step": {"id": step.step_id, "title": step.title},
+ "input": inp,
+ "output": out,
+ },
+ )
+ steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
+ trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
+ trace.log("workflow_completed", {"workflow_id": self._workflow_id})
+ return context
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py
new file mode 100644
index 0000000..d34ad88
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+@dataclass(slots=True)
+class DocExplainApiExposedContext:
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None = None
+ retrieved_rows: list[dict] = field(default_factory=list)
+ endpoints: list[str] = field(default_factory=list)
+ answer: str = ""
+ answer_generated_payload: dict[str, object] | None = None
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py
new file mode 100644
index 0000000..163660b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py
@@ -0,0 +1,24 @@
+"""Context protocols for the DOC_EXPLAIN/API_EXPOSED workflow."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class RetrievalWorkflowContext(Protocol):
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None
+ retrieved_rows: list[dict]
+ answer: str
+ answer_generated_payload: dict[str, object] | None
+
+
+class ApiWorkflowContext(RetrievalWorkflowContext, Protocol):
+ endpoints: list[str]
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py
new file mode 100644
index 0000000..dc511be
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py
@@ -0,0 +1,8 @@
+"""Pipeline logging helpers for DOC_EXPLAIN/API_EXPOSED."""
+
+from __future__ import annotations
+
+
+def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
+ runtime.trace.module("process.v2.pipeline").log(step, payload)
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md
new file mode 100644
index 0000000..c562d21
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md
@@ -0,0 +1,159 @@
+# DOC_EXPLAIN / FIND_FILES Workflow
+
+## Контракт сабинтента
+
+| Поле | Значение |
+|---|---|
+| `domain` | `DOCS` |
+| `intent` | `DOC_EXPLAIN` |
+| `subintent` | `FIND_FILES` |
+| `workflow_id` | `v2.docs_explain.find_files` |
+| `source` | `workflow.v2.find_files` |
+
+## Диаграмма флоу
+
+```mermaid
+flowchart TD
+ A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
+ B --> C["FetchRagRowsStep"]
+ C --> D["PrepareCandidateRowsStep"]
+ D --> E["BuildFilesEvidenceStep"]
+ E --> F["ApplyFilesEvidenceGateStep"]
+ F --> G["FinalizeFindFilesAnswerStep"]
+```
+
+## Шаги процесса
+
+### 1) `RequireRagSessionStep`
+
+Шаг проверяет, есть ли активная RAG-сессия. Если `rag_session_id` пустой, workflow останавливает дальнейший retrieval и пишет пользовательское сообщение в `answer`. Для `find_files` gate-решение на этом шаге обычно не ставится, но механизм поддержан.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | `V2Process` -> `DocExplainFindFilesContext` | Идентификатор активной RAG-сессии |
+| `self._missing_message` | Конфигурация в `graph.py` | Текст ответа, если сессии нет |
+| `self._missing_gate` | Конфигурация шага | Опциональный gate для раннего выхода |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой |
+| `context.gate_decision` | Заполняется `missing_gate`, если он передан и сессии нет |
+| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
+
+### 2) `ResolveRetrievalPlanStep`
+
+Шаг превращает route в retrieval-план через `RetrievalPlanResolver`. Профиль для этого сабинтента — `file_lookup`, с подходящими слоями и фильтрами. Параллельно пишет trace-событие `retrieval_plan_resolved`.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | Результат `intent_router` | Route с `anchors`, `target_terms`, `scope_type` |
+| `self._resolver` | DI из `graph.py` | Реализация policy-резолвера |
+| `context.answer` | Предыдущие шаги | Если уже есть ответ, шаг пропускается |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
+| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог с `profile`, `layers`, `limit`, `filters` |
+
+### 3) `FetchRagRowsStep`
+
+Шаг выполняет retrieval через `V2RagRetrievalAdapter`. Внутри адаптера объединяются seed-строки по `target_doc_hints` и основной retrieval по эмбеддингам/фильтрам плана. Если план не сформирован или уже есть готовый ответ, шаг ничего не делает.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | Контекст workflow | Сессия для поиска в `rag_chunks` |
+| `context.route.normalized_query` | Route | Нормализованный текст запроса |
+| `context.retrieval_plan` | Предыдущий шаг | План retrieval |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieved_rows` | `await rag_adapter.fetch_rows(rag_session_id, normalized_query, retrieval_plan)` |
+
+### 4) `PrepareCandidateRowsStep`
+
+Шаг собирает итоговые candidate rows для ранжирования файлов. Он добавляет metadata-lookup кандидаты и подмешивает seed по `target_doc_hints`, затем сохраняет merged-список в `context.rows`. Дополнительно пишет детальный retrieval-trace.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.retrieved_rows` | `FetchRagRowsStep` | Строки после retrieval |
+| `context.route` | Route | Нужен для hints/aliases/terms |
+| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata lookup |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
+| `process.v2.rag_retrieval.rag_rows_fetched` | Лог деталей rows, источников и top результатов |
+
+### 5) `BuildFilesEvidenceStep`
+
+Шаг ранжирует candidate rows в список файлов через `DocsEvidenceAssembler.assemble_files`. На выходе формируется shortlist `RetrievedFile` с оценками и причинами совпадения. Этот shortlist становится опорой для gate и финального ответа.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
+| `context.route` | Route | Сигналы маршрута для ranking |
+| `self._assembler` | DI из `graph.py` | Сборщик evidence |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.files` | `assemble_files(context.rows, context.route)` |
+| `process.v2.evidence.evidence_assembled` | Лог file-count и путей |
+
+### 6) `ApplyFilesEvidenceGateStep`
+
+Шаг проверяет качество shortlist через `DocsEvidenceGate.check_files`. Решение gate определяет, можно ли отвечать детерминированно или нужно более осторожное поведение. Для прозрачности пишет pipeline-лог с полями `passed/reason/answer_mode`.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | Route | Сигналы запроса для gate |
+| `context.files` | `BuildFilesEvidenceStep` | Ранжированные файлы |
+| `self._gate` | DI из `graph.py` | Правила оценки evidence |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.gate_decision` | `self._gate.check_files(context.route, context.files)` |
+| `process.v2.pipeline.evidence_gate_checked` | Лог результата gate |
+
+### 7) `FinalizeFindFilesAnswerStep`
+
+Шаг собирает финальный текстовый ответ без LLM. Если файлов нет, возвращается `insufficient_evidence`; если файл один — отдаётся один путь; если несколько — до 4 путей. Если gate вернул `low_confidence_shortlist`, также возвращается ограниченный список путей.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.files` | `BuildFilesEvidenceStep` | Список найденных файлов |
+| `context.gate_decision` | `ApplyFilesEvidenceGateStep` | Режим и причина ответа |
+| `context.answer` | Предыдущие шаги | Если уже заполнен, шаг пропускается |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.answer` | Детерминированно: пусто/1 путь/до 4 путей |
+| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке формирования |
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py
new file mode 100644
index 0000000..02a46a6
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py
@@ -0,0 +1,3 @@
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
+
+__all__ = ["DocExplainFindFilesGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py
new file mode 100644
index 0000000..4424910
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.buffered_graph import DocExplainFindFilesWorkflowGraph
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.apply_files_evidence_gate_step import (
+ ApplyFilesEvidenceGateStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.build_files_evidence_step import BuildFilesEvidenceStep
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.fetch_rag_rows_step import FetchRagRowsStep
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.finalize_find_files_answer_step import (
+ FinalizeFindFilesAnswerStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.prepare_candidate_rows_step import (
+ PrepareCandidateRowsStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.require_rag_session_step import RequireRagSessionStep
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.resolve_retrieval_plan_step import (
+ ResolveRetrievalPlanStep,
+)
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+
+
+class DocExplainFindFilesGraph(DocExplainFindFilesWorkflowGraph[DocExplainFindFilesContext]):
+ def __init__(
+ self,
+ policy_resolver: RetrievalPlanResolver,
+ rag_adapter: V2RagRetrievalAdapter,
+ evidence_assembler: DocsEvidenceAssembler,
+ evidence_gate: DocsEvidenceGate,
+ ) -> None:
+ super().__init__(
+ workflow_id="v2.docs_explain.find_files",
+ source="workflow.v2.find_files",
+ steps=[
+ RequireRagSessionStep(
+ missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
+ ),
+ ResolveRetrievalPlanStep(policy_resolver),
+ FetchRagRowsStep(rag_adapter),
+ PrepareCandidateRowsStep(CandidateRowsBuilder()),
+ BuildFilesEvidenceStep(evidence_assembler),
+ ApplyFilesEvidenceGateStep(evidence_gate),
+ FinalizeFindFilesAnswerStep(),
+ ],
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py
new file mode 100644
index 0000000..bbd1b71
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
+
+
+class ApplyFilesEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "apply_files_evidence_gate"
+ title = "Проверка file evidence"
+
+ def __init__(self, gate: DocsEvidenceGate) -> None:
+ self._gate = gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.gate_decision = self._gate.check_files(context.route, context.files)
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": context.gate_decision.passed,
+ "reason": context.gate_decision.reason,
+ "answer_mode": context.gate_decision.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py
new file mode 100644
index 0000000..55f8056
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
+
+
+class BuildFilesEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "build_files_evidence"
+ title = "Сборка file evidence"
+
+ def __init__(self, assembler: DocsEvidenceAssembler) -> None:
+ self._assembler = assembler
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.files = self._assembler.assemble_files(context.rows, context.route)
+ context.runtime.trace.module("process.v2.evidence").log(
+ "evidence_assembled",
+ {
+ "mode": "find_files",
+ "file_count": len(context.files),
+ "files": [item.path for item in context.files],
+ },
+ )
+ log_pipeline_step(
+ context.runtime,
+ "evidence_assembled",
+ {
+ "mode": "find_files",
+ "primary_file": context.files[0].path if context.files else None,
+ "file_count": len(context.files),
+ },
+ )
+ log_ranking(context.runtime, context.files)
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"file_count": len(context.files)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py
new file mode 100644
index 0000000..bf24b3a
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "fetch_rag_rows"
+ title = "Получение строк из RAG"
+
+ def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
+ self._rag_adapter = rag_adapter
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ context.retrieved_rows = await self._rag_adapter.fetch_rows(
+ context.rag_session_id,
+ context.route.normalized_query,
+ context.retrieval_plan,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"retrieved_row_count": len(context.retrieved_rows)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py
new file mode 100644
index 0000000..4f770a2
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class FinalizeFindFilesAnswerStep(WorkflowStep[DocExplainFindFilesContext]):
+ step_id = "finalize_find_files_answer"
+ title = "Сборка списка файлов"
+
+ async def run(self, context: DocExplainFindFilesContext) -> DocExplainFindFilesContext:
+ if context.answer:
+ return context
+ if not context.files:
+ context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу."
+ context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
+ return context
+ if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist":
+ context.answer = "\n".join(item.path for item in context.files[:4])
+ context.answer_generated_payload = {
+ "answer_mode": context.gate_decision.answer_mode,
+ "answer_length": len(context.answer),
+ }
+ return context
+ if len(context.files) == 1:
+ context.answer = context.files[0].path
+ context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
+ return context
+ context.answer = "\n".join(item.path for item in context.files[:4])
+ context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
+ return context
+
+ def trace_output(self, context: DocExplainFindFilesContext) -> dict[str, object]:
+ return {"answer_length": len(context.answer)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py
new file mode 100644
index 0000000..46cecc3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_retrieval_trace
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "prepare_candidate_rows"
+ title = "Подготовка candidate rows"
+
+ def __init__(self, builder: CandidateRowsBuilder) -> None:
+ self._builder = builder
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ prepared = self._builder.build(context.retrieved_rows, context.route)
+ context.rows = prepared.rows
+ log_retrieval_trace(
+ context.runtime,
+ context.route,
+ context.retrieval_plan,
+ context.retrieved_rows,
+ prepared.metadata_rows,
+ prepared.rows,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"row_count": len(context.rows)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py
new file mode 100644
index 0000000..17a03b6
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "require_rag_session"
+ title = "Проверка RAG-сессии"
+
+ def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
+ self._missing_message = missing_message
+ self._missing_gate = missing_gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.rag_session_id:
+ return context
+ context.answer = self._missing_message
+ if self._missing_gate is not None:
+ context.gate_decision = self._missing_gate
+ context.answer_generated_payload = {
+ "answer_mode": self._missing_gate.answer_mode,
+ "answer_length": len(context.answer),
+ }
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": self._missing_gate.passed,
+ "reason": self._missing_gate.reason,
+ "answer_mode": self._missing_gate.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"has_rag_session": bool(context.rag_session_id)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py
new file mode 100644
index 0000000..d87d5e5
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "resolve_retrieval_plan"
+ title = "Выбор retrieval-плана"
+
+ def __init__(self, resolver: RetrievalPlanResolver) -> None:
+ self._resolver = resolver
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ plan = self._resolver.resolve(context.route)
+ context.retrieval_plan = plan
+ context.runtime.trace.module("process.v2.retrieval_policy").log(
+ "retrieval_plan_resolved",
+ {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
+ )
+ log_pipeline_step(
+ context.runtime,
+ "retrieval_profile_selected",
+ {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"profile": getattr(context.retrieval_plan, "profile", "")}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py
new file mode 100644
index 0000000..373695e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py
@@ -0,0 +1,2 @@
+"""Retrieval-related step helpers for the doc-explain find-files workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py
new file mode 100644
index 0000000..e92f7c0
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py
@@ -0,0 +1,43 @@
+"""Сборка candidate rows для doc-explain find-files (метаданные + сиды по hints)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
+ RagRowIndex,
+ merge_row_lists,
+ normalize_doc_path,
+ normalized_path_set,
+ seed_candidates_from_target_hints,
+)
+
+
+@dataclass(slots=True)
+class CandidateRowsResult:
+ metadata_rows: list[dict]
+ rows: list[dict]
+
+
+class CandidateRowsBuilder:
+ def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
+ metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
+ rows = merge_row_lists(retrieved_rows, metadata_rows)
+ rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
+ self._print_missing_target_hints(route, rows)
+ return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
+
+ def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
+ if not route.anchors.target_doc_hints:
+ return
+ candidate_paths = normalized_path_set(rows)
+ for hint in route.anchors.target_doc_hints:
+ if not str(hint or "").strip():
+ continue
+ normalized = normalize_doc_path(hint)
+ if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
+ continue
+ if normalized not in candidate_paths:
+ print("ERROR: target doc missing from candidates:", normalized)
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py
new file mode 100644
index 0000000..f74df57
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
+from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteResult, V2Subintent
+from app.core.rag.contracts.enums import RagLayer
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class DocExplainFindFilesRetrievalPolicy:
+ _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
+
+ def supports(self, route: V2RouteResult) -> bool:
+ return route.subintent == V2Subintent.FIND_FILES
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ return RetrievalPlan(
+ profile="file_lookup",
+ layers=list(self._LAYERS),
+ limit=12,
+ filters=self._build_filters(route),
+ )
+
+ def _build_filters(self, route: V2RouteResult) -> dict[str, object]:
+ filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
+ if route.anchors.process_domain:
+ filters["metadata.domain"] = route.anchors.process_domain
+ if route.anchors.process_subdomain:
+ filters["metadata.subdomain"] = route.anchors.process_subdomain
+ prefixes = self._path_prefixes(route)
+ if prefixes:
+ filters["path_prefixes"] = prefixes
+ filters["prefer_path_prefixes"] = self._prefer_prefixes(route, prefixes)
+ filters["prefer_like_patterns"] = self._like_patterns(route)
+ return filters
+
+ def _path_prefixes(self, route: V2RouteResult) -> list[str]:
+ hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
+ if hint_prefixes:
+ return hint_prefixes
+ file_prefixes = [item for item in route.anchors.file_names if str(item).strip().startswith("docs/")]
+ derived = _prefixes_from_paths(file_prefixes)
+ if derived:
+ return derived
+ signals = anchor_signal_types(route)
+ if V2AnchorType.API_ENDPOINT in signals:
+ return ["docs/api/", "docs/"]
+ if V2AnchorType.ARCHITECTURE in signals:
+ return ["docs/architecture/", "docs/"]
+ if V2AnchorType.LOGIC_FLOW in signals:
+ return ["docs/logic/", "docs/"]
+ if V2AnchorType.DOMAIN_ENTITY in signals:
+ return ["docs/domains/", "docs/"]
+ return ["docs/"]
+
+ def _prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
+ preferred = list(prefixes)
+ if route.anchors.process_domain or route.anchors.process_subdomain:
+ preferred.extend(["docs/domains/", "docs/logic/"])
+ return _unique_terms(preferred or ["docs/"])
+
+ def _like_patterns(self, route: V2RouteResult) -> list[str]:
+ if route.anchors.target_doc_hints:
+ names = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
+ return [f"%{name.lower()}%" for name in names]
+ terms = list(route.target_terms)
+ terms.extend(route.anchors.endpoint_paths)
+ terms.extend(route.anchors.file_names)
+ terms.extend(route.anchors.entity_names)
+ terms.extend(route.anchors.matched_aliases)
+ if route.anchors.process_domain:
+ terms.append(route.anchors.process_domain)
+ if route.anchors.process_subdomain:
+ terms.append(route.anchors.process_subdomain)
+ return [f"%{term.lower()}%" for term in _unique_terms(terms)]
+
+
+def _prefixes_from_paths(paths: list[str]) -> list[str]:
+ prefixes: list[str] = []
+ for path in paths:
+ value = str(path).strip().strip("/")
+ if "/" not in value:
+ continue
+ prefix = value.rsplit("/", 1)[0] + "/"
+ if prefix:
+ prefixes.append(prefix)
+ return _unique_terms(prefixes)
+
+
+def _unique_terms(items: list[str]) -> list[str]:
+ seen: set[str] = set()
+ unique: list[str] = []
+ for raw in items:
+ value = str(raw or "").strip()
+ if not value or value in seen:
+ continue
+ seen.add(value)
+ unique.append(value)
+ return unique
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py
new file mode 100644
index 0000000..17da4e5
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py
@@ -0,0 +1,2 @@
+"""Runtime helpers for the doc-explain find-files workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py
new file mode 100644
index 0000000..3fbd3a3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py
@@ -0,0 +1,46 @@
+"""Граф workflow doc-explain find-files: буфер шагов и один сброс в trace (на базе utils.workflow)."""
+
+from __future__ import annotations
+
+from typing import TypeVar
+
+from app.core.agent.utils.workflow.context import WorkflowContext
+from app.core.agent.utils.workflow.graph import WorkflowGraph
+
+TContext = TypeVar("TContext", bound=WorkflowContext)
+
+
+class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]):
+ """Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
+
+ async def run(self, context: TContext) -> TContext:
+ trace = context.runtime.trace.module(self._source)
+ trace.log("workflow_started", {"workflow_id": self._workflow_id})
+ steps_buffer: list[dict[str, object]] = []
+ for step in self._steps:
+ inp = step.trace_input(context)
+ request_id = context.runtime.request.request_id
+ await context.runtime.publisher.publish_status(
+ request_id,
+ self._source,
+ f"Шаг workflow: {step.title}.",
+ {"workflow_id": self._workflow_id, "step_id": step.step_id},
+ )
+ context = await step.run(context)
+ out = step.trace_output(context)
+ trace.log(
+ "workflow_step_traced",
+ {
+ "workflow_id": self._workflow_id,
+ "step": {"id": step.step_id, "title": step.title},
+ "input": inp,
+ "output": out,
+ },
+ )
+ steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
+ trace.log(
+ "workflow_trace_flushed",
+ {"workflow_id": self._workflow_id, "steps": steps_buffer},
+ )
+ trace.log("workflow_completed", {"workflow_id": self._workflow_id})
+ return context
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py
new file mode 100644
index 0000000..3d9dacd
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+@dataclass(slots=True)
+class DocExplainFindFilesContext:
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None = None
+ retrieved_rows: list[dict] = field(default_factory=list)
+ rows: list[dict] = field(default_factory=list)
+ files: list[RetrievedFile] = field(default_factory=list)
+ gate_decision: EvidenceGateDecision | None = None
+ answer: str = ""
+ answer_generated_payload: dict[str, object] | None = None
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py
new file mode 100644
index 0000000..c9ee1ab
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py
@@ -0,0 +1,26 @@
+"""Протоколы контекста для workflow doc-explain find-files."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class RetrievalWorkflowContext(Protocol):
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None
+ retrieved_rows: list[dict]
+ rows: list[dict]
+ gate_decision: EvidenceGateDecision | None
+ answer: str
+ answer_generated_payload: dict[str, object] | None
+
+
+class FindFilesWorkflowContext(RetrievalWorkflowContext, Protocol):
+ files: list[RetrievedFile]
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py
new file mode 100644
index 0000000..7440006
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py
@@ -0,0 +1,106 @@
+"""Логирование retrieval/pipeline/ranking для doc-explain find-files."""
+
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
+
+
+def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
+ runtime.trace.module("process.v2.pipeline").log(step, payload)
+
+
+def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
+ runtime.trace.module("process.v2.rag_retrieval").log(
+ "rag_rows_fetched",
+ {
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "rows": [trace_row(row) for row in rows],
+ },
+ )
+ hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
+ log_pipeline_step(
+ runtime,
+ "candidate_generation",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "details": {
+ "target_doc_hints": list(route.anchors.target_doc_hints),
+ "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
+ },
+ "resolved_aliases": route.anchors.matched_aliases,
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
+ "sources": {
+ "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
+ "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
+ "semantic": [trace_row(row) for row in retrieved_rows[:5]],
+ },
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "retrieval_executed",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "top_results": [trace_row(row) for row in rows[:5]],
+ },
+ )
+
+
+def log_ranking(runtime, items: list) -> None:
+ top_docs: list[dict[str, object]] = []
+ for item in items[:4]:
+ top_docs.append(
+ {
+ "doc": getattr(item, "path", ""),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ }
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "top_docs_after_ranking": top_docs,
+ "ranking_score_breakdown": [
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ }
+ for item in items[:4]
+ ],
+ },
+ )
+
+
+def trace_row(row: dict) -> dict[str, object]:
+ metadata = row.get("metadata") or {}
+ content = str(row.get("content") or "").strip()
+ return {
+ "layer": str(row.get("layer") or ""),
+ "path": str(row.get("path") or ""),
+ "title": str(row.get("title") or ""),
+ "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
+ "entity_name": str(metadata.get("entity_name") or ""),
+ "summary_text": str(metadata.get("summary_text") or "")[:400],
+ "section_path": str(metadata.get("section_path") or ""),
+ "metadata_domain": str(metadata.get("domain") or ""),
+ "metadata_subdomain": str(metadata.get("subdomain") or ""),
+ "content_preview": content[:400],
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md
new file mode 100644
index 0000000..8ed14ee
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md
@@ -0,0 +1,162 @@
+# DOC_EXPLAIN / SUMMARY Workflow
+
+## Контракт сабинтента
+
+| Поле | Значение |
+|---|---|
+| `domain` | `DOCS` |
+| `intent` | `DOC_EXPLAIN` |
+| `subintent` | `SUMMARY` |
+| `workflow_id` | `v2.docs_explain.summary` |
+| `source` | `workflow.v2.summary` |
+
+## Диаграмма флоу
+
+```mermaid
+flowchart TD
+ A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
+ B --> C["FetchRagRowsStep"]
+ C --> D["PrepareCandidateRowsStep"]
+ D --> E["BuildSummaryEvidenceStep"]
+ E --> F["ApplySummaryEvidenceGateStep"]
+ F --> G["GenerateSummaryAnswerStep"]
+```
+
+## Шаги процесса
+
+### 1) `RequireRagSessionStep`
+
+Шаг валидирует наличие активной RAG-сессии до retrieval. Если `rag_session_id` отсутствует, workflow завершает обработку и записывает сообщение об отсутствии опоры. Это предотвращает запуск последующих шагов без индекса проекта.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | `V2Process` -> `DocExplainSummaryContext` | Идентификатор RAG-сессии |
+| `self._missing_message` | Конфигурация в `graph.py` | Ответ при отсутствии сессии |
+| `self._missing_gate` | Конфиг шага | Опциональный gate для раннего выхода |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.answer` | Заполняется `missing_message` при пустом `rag_session_id` |
+| `context.gate_decision` | Заполняется, если передан `missing_gate` |
+| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
+
+### 2) `ResolveRetrievalPlanStep`
+
+Шаг вызывает policy-резолвер и строит `RetrievalPlan` для doc-summary сценария. Профиль и фильтры зависят от сигналов route: endpoint/architecture/logic/domain. Результат сохраняется в контекст и логируется в trace.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | `intent_router` | Route с `target_terms` и `anchors` |
+| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` |
+| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
+| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог профиля, слоёв, лимита и фильтров |
+
+### 3) `FetchRagRowsStep`
+
+Шаг выполняет retrieval по сформированному плану. Внутри адаптера объединяются seed-результаты по `target_doc_hints` и основной retrieval. Это даёт более устойчивую выдачу как по явным hints, так и по семантическому совпадению.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | Контекст workflow | Сессия для поиска |
+| `context.route.normalized_query` | Route | Нормализованный запрос |
+| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` |
+
+### 4) `PrepareCandidateRowsStep`
+
+Шаг обогащает найденные строки через metadata lookup и подмешивает кандидатов из `target_doc_hints`. На выходе формируется `context.rows`, который уже используется для построения summary-evidence. Шаг также пишет подробные retrieval-логи для анализа ранжирования.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.retrieved_rows` | `FetchRagRowsStep` | Строки retrieval |
+| `context.route` | Route | Сигналы hints/aliases/terms |
+| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata поиска |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
+| `process.v2.rag_retrieval.rag_rows_fetched` | Лог выборки и источников кандидатов |
+
+### 5) `BuildSummaryEvidenceStep`
+
+Шаг преобразует candidate rows в список `RetrievedSummary` через `DocsEvidenceAssembler.assemble_summaries`. Здесь формируется shortlist документов с оценками и причинами совпадения, который затем проверяется gate. Дополнительно логируются ranking-разборы.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
+| `context.route` | Route | Сигналы для ranking |
+| `self._assembler` | DI из `graph.py` | Сборщик summary evidence |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.documents` | `assemble_summaries(context.rows, context.route)` |
+| `process.v2.evidence.evidence_assembled` | Лог `document_count` и списка путей |
+
+### 6) `ApplySummaryEvidenceGateStep`
+
+Шаг оценивает достаточность и надёжность собранного summary-evidence. Решение gate влияет на дальнейшее формирование ответа: либо прямой отказ/ограничение, либо генерация summary. Результат логируется в pipeline.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | Route | Контекст запроса |
+| `context.documents` | `BuildSummaryEvidenceStep` | Кандидаты summary |
+| `self._gate` | DI из `graph.py` | Правила валидации evidence |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` |
+| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` |
+
+### 7) `GenerateSummaryAnswerStep`
+
+Шаг формирует финальный ответ: gate-message, детерминированный ответ или LLM-генерация. При LLM-ветке собирается `prompt_input` из пользовательского запроса, route-сигналов и найденных summary-блоков. Итог всегда записывается в `context.answer` и сопровождается `answer_generated_payload`.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы |
+| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Режим допуска к генерации |
+| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм |
+| `context.prompt_name` | `V2Process` | Имя prompt-шаблона |
+| `self._llm` | DI из `graph.py` | LLM-сервис |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.prompt_input` | Склеивается из route + shortlist документов |
+| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` |
+| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по выбранной ветке |
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py
new file mode 100644
index 0000000..5dd059b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py
@@ -0,0 +1,3 @@
+from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
+
+__all__ = ["DocExplainSummaryGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py
new file mode 100644
index 0000000..dd43d50
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.buffered_graph import DocExplainSummaryWorkflowGraph
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.apply_summary_evidence_gate_step import (
+ ApplySummaryEvidenceGateStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.fetch_rag_rows_step import FetchRagRowsStep
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.generate_summary_answer_step import (
+ GenerateSummaryAnswerStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.prepare_candidate_rows_step import (
+ PrepareCandidateRowsStep,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.require_rag_session_step import RequireRagSessionStep
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.resolve_retrieval_plan_step import (
+ ResolveRetrievalPlanStep,
+)
+from app.core.agent.utils.llm import AgentLlmService
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+
+
+class DocExplainSummaryGraph(DocExplainSummaryWorkflowGraph[DocExplainSummaryContext]):
+ def __init__(
+ self,
+ llm: AgentLlmService,
+ policy_resolver: RetrievalPlanResolver,
+ rag_adapter: V2RagRetrievalAdapter,
+ evidence_assembler: DocsEvidenceAssembler,
+ evidence_gate: DocsEvidenceGate,
+ ) -> None:
+ super().__init__(
+ workflow_id="v2.docs_explain.summary",
+ source="workflow.v2.summary",
+ steps=[
+ RequireRagSessionStep(
+ missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
+ ),
+ ResolveRetrievalPlanStep(policy_resolver),
+ FetchRagRowsStep(rag_adapter),
+ PrepareCandidateRowsStep(CandidateRowsBuilder()),
+ BuildSummaryEvidenceStep(evidence_assembler),
+ ApplySummaryEvidenceGateStep(evidence_gate),
+ GenerateSummaryAnswerStep(llm),
+ ],
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py
new file mode 100644
index 0000000..8b057d4
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
+
+
+class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "apply_summary_evidence_gate"
+ title = "Проверка summary evidence"
+
+ def __init__(self, gate: DocsEvidenceGate) -> None:
+ self._gate = gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.gate_decision = self._gate.check_summaries(context.route, context.documents)
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": context.gate_decision.passed,
+ "reason": context.gate_decision.reason,
+ "answer_mode": context.gate_decision.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py
new file mode 100644
index 0000000..7b8030c
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
+
+
+class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "build_summary_evidence"
+ title = "Сборка summary evidence"
+
+ def __init__(self, assembler: DocsEvidenceAssembler) -> None:
+ self._assembler = assembler
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.documents = self._assembler.assemble_summaries(context.rows, context.route)
+ context.runtime.trace.module("process.v2.evidence").log(
+ "evidence_assembled",
+ {
+ "mode": "summary",
+ "document_count": len(context.documents),
+ "documents": [item.path for item in context.documents],
+ },
+ )
+ log_pipeline_step(
+ context.runtime,
+ "evidence_assembled",
+ {
+ "mode": "summary",
+ "primary_doc": context.documents[0].path if context.documents else None,
+ "document_count": len(context.documents),
+ },
+ )
+ log_ranking(context.runtime, context.documents)
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"document_count": len(context.documents)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py
new file mode 100644
index 0000000..d4009e3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "fetch_rag_rows"
+ title = "Получение строк из RAG"
+
+ def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
+ self._rag_adapter = rag_adapter
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ context.retrieved_rows = await self._rag_adapter.fetch_rows(
+ context.rag_session_id,
+ context.route.normalized_query,
+ context.retrieval_plan,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"retrieved_row_count": len(context.retrieved_rows)}
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py
similarity index 63%
rename from src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py
rename to src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py
index 030fa50..dfeb7a8 100644
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py
@@ -3,28 +3,36 @@ from __future__ import annotations
import asyncio
import json
-from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
from app.core.agent.utils.llm import AgentLlmService
-from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
+from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
from app.core.agent.utils.workflow import WorkflowStep
-class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
+class GenerateSummaryAnswerStep(WorkflowStep[DocExplainSummaryContext]):
step_id = "generate_summary_answer"
title = "Сборка ответа по summary"
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
- async def run(self, context: DocsExplainSummaryContext) -> DocsExplainSummaryContext:
+ async def run(self, context: DocExplainSummaryContext) -> DocExplainSummaryContext:
+ if context.answer:
+ return context
if context.gate_decision is not None and not context.gate_decision.passed:
context.answer = context.gate_decision.message
+ context.answer_generated_payload = {
+ "answer_mode": context.gate_decision.answer_mode,
+ "answer_length": len(context.answer),
+ }
return context
if not context.workflow_llm_enabled:
context.answer = self._build_deterministic_answer(context)
+ context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
if not context.documents:
context.answer = "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
+ context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
return context
context.prompt_input = self._build_prompt_input(context)
request_id = context.runtime.request.request_id
@@ -35,9 +43,10 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
log_context=f"agent:{request_id}",
trace=context.runtime.trace.module("workflow.v2.summary.llm"),
)
+ context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
- def _build_prompt_input(self, context: DocsExplainSummaryContext) -> str:
+ def _build_prompt_input(self, context: DocExplainSummaryContext) -> str:
blocks = [
f"Запрос пользователя:\n{context.route.user_query}",
"Сигналы запроса:\n" + json.dumps(route_anchor_summary(context.route), ensure_ascii=False, indent=2),
@@ -52,17 +61,13 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
)
return "\n\n".join(blocks)
- def _build_deterministic_answer(self, context: DocsExplainSummaryContext) -> str:
+ def _build_deterministic_answer(self, context: DocExplainSummaryContext) -> str:
if not context.documents:
return "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
- lines = []
- primary = context.documents[0]
- lines.append(primary.summary)
- lines.append("")
- lines.append("Файлы-источники:")
+ lines = [context.documents[0].summary, "", "Файлы-источники:"]
for item in context.documents:
lines.append(f"- {item.path}")
return "\n".join(lines)
- def trace_output(self, context: DocsExplainSummaryContext) -> dict[str, object]:
+ def trace_output(self, context: DocExplainSummaryContext) -> dict[str, object]:
return {"answer_length": len(context.answer)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py
new file mode 100644
index 0000000..87a3d31
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_retrieval_trace
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "prepare_candidate_rows"
+ title = "Подготовка candidate rows"
+
+ def __init__(self, builder: CandidateRowsBuilder) -> None:
+ self._builder = builder
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ prepared = self._builder.build(context.retrieved_rows, context.route)
+ context.rows = prepared.rows
+ log_retrieval_trace(
+ context.runtime,
+ context.route,
+ context.retrieval_plan,
+ context.retrieved_rows,
+ prepared.metadata_rows,
+ prepared.rows,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"row_count": len(context.rows)}
diff --git a/src/app/core/agent/processes/v2/prompts.yml b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml
similarity index 100%
rename from src/app/core/agent/processes/v2/prompts.yml
rename to src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py
new file mode 100644
index 0000000..78e6694
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "require_rag_session"
+ title = "Проверка RAG-сессии"
+
+ def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
+ self._missing_message = missing_message
+ self._missing_gate = missing_gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.rag_session_id:
+ return context
+ context.answer = self._missing_message
+ if self._missing_gate is not None:
+ context.gate_decision = self._missing_gate
+ context.answer_generated_payload = {
+ "answer_mode": self._missing_gate.answer_mode,
+ "answer_length": len(context.answer),
+ }
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": self._missing_gate.passed,
+ "reason": self._missing_gate.reason,
+ "answer_mode": self._missing_gate.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"has_rag_session": bool(context.rag_session_id)}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py
new file mode 100644
index 0000000..a4c5a58
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "resolve_retrieval_plan"
+ title = "Выбор retrieval-плана"
+
+ def __init__(self, resolver: RetrievalPlanResolver) -> None:
+ self._resolver = resolver
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ plan = self._resolver.resolve(context.route)
+ context.retrieval_plan = plan
+ context.runtime.trace.module("process.v2.retrieval_policy").log(
+ "retrieval_plan_resolved",
+ {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
+ )
+ log_pipeline_step(
+ context.runtime,
+ "retrieval_profile_selected",
+ {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"profile": getattr(context.retrieval_plan, "profile", "")}
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py
new file mode 100644
index 0000000..0fac291
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py
@@ -0,0 +1,2 @@
+"""Retrieval-related step helpers for the doc-explain summary workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py
new file mode 100644
index 0000000..5d6d81f
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py
@@ -0,0 +1,43 @@
+"""Сборка candidate rows для doc-explain summary (метаданные + сиды по hints)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
+ RagRowIndex,
+ merge_row_lists,
+ normalize_doc_path,
+ normalized_path_set,
+ seed_candidates_from_target_hints,
+)
+
+
+@dataclass(slots=True)
+class CandidateRowsResult:
+ metadata_rows: list[dict]
+ rows: list[dict]
+
+
+class CandidateRowsBuilder:
+ def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
+ metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
+ rows = merge_row_lists(retrieved_rows, metadata_rows)
+ rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
+ self._print_missing_target_hints(route, rows)
+ return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
+
+ def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
+ if not route.anchors.target_doc_hints:
+ return
+ candidate_paths = normalized_path_set(rows)
+ for hint in route.anchors.target_doc_hints:
+ if not str(hint or "").strip():
+ continue
+ normalized = normalize_doc_path(hint)
+ if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
+ continue
+ if normalized not in candidate_paths:
+ print("ERROR: target doc missing from candidates:", normalized)
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py
new file mode 100644
index 0000000..331c087
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py
@@ -0,0 +1,168 @@
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
+from app.core.agent.utils.process_v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
+from app.core.rag.contracts.enums import RagLayer
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class DocExplainSummaryRetrievalPolicy:
+ _SUMMARY_LAYERS = {
+ "docs_api_method_explain": [
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_FACT_INDEX,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ "docs_summary_api_endpoint": [
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_FACT_INDEX,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ "docs_summary_logic_flow": [
+ RagLayer.DOCS_WORKFLOW_INDEX,
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ "docs_summary_domain_entity": [
+ RagLayer.DOCS_ENTITY_CATALOG,
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ "docs_summary_architecture": [
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_RELATION_GRAPH,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ "docs_summary_generic": [
+ RagLayer.DOCS_DOCUMENT_CATALOG,
+ RagLayer.DOCS_DOC_CHUNKS,
+ ],
+ }
+ _API_DOC_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
+
+ def supports(self, route: V2RouteResult) -> bool:
+ return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.SUMMARY
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ profile = self._profile(route)
+ return RetrievalPlan(
+ profile=profile,
+ layers=list(self._SUMMARY_LAYERS[profile]),
+ limit=10 if profile == "docs_api_method_explain" else 8,
+ filters=self._filters(route),
+ )
+
+ def _filters(self, route: V2RouteResult) -> dict[str, object]:
+ if self._is_api_method_explain(route):
+ return self._api_method_filters(route)
+ filters = self._base_filters(route)
+ filters["prefer_path_prefixes"] = self._summary_prefixes(route)
+ filters["prefer_like_patterns"] = self._prefer_like_patterns(route)
+ if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
+ filters["path_prefixes"] = ["docs/api/", "docs/"]
+ return filters
+
+ def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
+ filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
+ if route.anchors.process_domain:
+ filters["metadata.domain"] = route.anchors.process_domain
+ if route.anchors.process_subdomain:
+ filters["metadata.subdomain"] = route.anchors.process_subdomain
+ return filters
+
+ def _api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
+ filters = self._base_filters(route)
+ filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
+ filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
+ filters["prefer_like_patterns"] = self._api_method_patterns(route)
+ return filters
+
+ def _profile(self, route: V2RouteResult) -> str:
+ if self._is_api_method_explain(route):
+ return "docs_api_method_explain"
+ meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
+ if len(meaningful) != 1:
+ return "docs_summary_generic"
+ mapping = {
+ V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
+ V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
+ V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
+ V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
+ }
+ return mapping.get(next(iter(meaningful)), "docs_summary_generic")
+
+ def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
+ signals = anchor_signal_types(route)
+ prefixes: list[str] = []
+ if V2AnchorType.API_ENDPOINT in signals:
+ prefixes.extend(["docs/api/", "docs/"])
+ if V2AnchorType.ARCHITECTURE in signals:
+ prefixes.extend(["docs/architecture/", "docs/"])
+ if V2AnchorType.LOGIC_FLOW in signals:
+ prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
+ if V2AnchorType.DOMAIN_ENTITY in signals:
+ prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
+ return _unique_terms(prefixes or ["docs/"])
+
+ def _prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
+ terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
+ terms.extend(route.anchors.endpoint_paths)
+ terms.extend(route.target_terms)
+ terms.extend(route.anchors.file_names)
+ terms.extend(route.anchors.entity_names)
+ terms.extend(route.anchors.matched_aliases)
+ if route.anchors.process_domain:
+ terms.append(route.anchors.process_domain)
+ if route.anchors.process_subdomain:
+ terms.append(route.anchors.process_subdomain)
+ return [f"%{term.lower()}%" for term in _unique_terms(terms)]
+
+ def _api_method_patterns(self, route: V2RouteResult) -> list[str]:
+ terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
+ terms.extend(route.anchors.target_doc_hints)
+ terms.extend(route.anchors.endpoint_paths)
+ terms.extend(route.target_terms)
+ patterns: list[str] = []
+ for term in _unique_terms(terms):
+ lowered = term.lower()
+ stripped = lowered.strip("/")
+ if stripped:
+ patterns.append(f"%{stripped}%")
+ if lowered:
+ patterns.append(f"%{lowered}%")
+ return _unique_terms(patterns)
+
+ def _is_api_method_explain(self, route: V2RouteResult) -> bool:
+ if route.subintent != V2Subintent.SUMMARY:
+ return False
+ if route.anchors.endpoint_paths:
+ return True
+ if _has_api_like_hints(route.anchors.target_doc_hints):
+ return True
+ return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
+
+
+def _unique_terms(items: list[str]) -> list[str]:
+ seen: set[str] = set()
+ unique: list[str] = []
+ for raw in items:
+ value = str(raw or "").strip()
+ if not value or value in seen:
+ continue
+ seen.add(value)
+ unique.append(value)
+ return unique
+
+
+def _has_api_like_hints(hints: list[str]) -> bool:
+ for hint in hints:
+ value = str(hint or "").strip().lower()
+ if not value:
+ continue
+ if value.startswith("/"):
+ return True
+ if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
+ return True
+ if "endpoint" in value or "method" in value:
+ return True
+ return False
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py
new file mode 100644
index 0000000..b8a6f58
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py
@@ -0,0 +1,2 @@
+"""Runtime helpers for the doc-explain summary workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py
new file mode 100644
index 0000000..ba0c1ed
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py
@@ -0,0 +1,46 @@
+"""Граф workflow doc-explain summary: буфер шагов и один сброс в trace (на базе utils.workflow)."""
+
+from __future__ import annotations
+
+from typing import TypeVar
+
+from app.core.agent.utils.workflow.context import WorkflowContext
+from app.core.agent.utils.workflow.graph import WorkflowGraph
+
+TContext = TypeVar("TContext", bound=WorkflowContext)
+
+
+class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]):
+ """Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
+
+ async def run(self, context: TContext) -> TContext:
+ trace = context.runtime.trace.module(self._source)
+ trace.log("workflow_started", {"workflow_id": self._workflow_id})
+ steps_buffer: list[dict[str, object]] = []
+ for step in self._steps:
+ inp = step.trace_input(context)
+ request_id = context.runtime.request.request_id
+ await context.runtime.publisher.publish_status(
+ request_id,
+ self._source,
+ f"Шаг workflow: {step.title}.",
+ {"workflow_id": self._workflow_id, "step_id": step.step_id},
+ )
+ context = await step.run(context)
+ out = step.trace_output(context)
+ trace.log(
+ "workflow_step_traced",
+ {
+ "workflow_id": self._workflow_id,
+ "step": {"id": step.step_id, "title": step.title},
+ "input": inp,
+ "output": out,
+ },
+ )
+ steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
+ trace.log(
+ "workflow_trace_flushed",
+ {"workflow_id": self._workflow_id, "steps": steps_buffer},
+ )
+ trace.log("workflow_completed", {"workflow_id": self._workflow_id})
+ return context
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py
new file mode 100644
index 0000000..5460b84
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+@dataclass(slots=True)
+class DocExplainSummaryContext:
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ prompt_name: str
+ workflow_llm_enabled: bool = True
+ retrieval_plan: RetrievalPlan | None = None
+ retrieved_rows: list[dict] = field(default_factory=list)
+ rows: list[dict] = field(default_factory=list)
+ documents: list[RetrievedSummary] = field(default_factory=list)
+ gate_decision: EvidenceGateDecision | None = None
+ prompt_input: str = ""
+ answer: str = ""
+ answer_generated_payload: dict[str, object] | None = None
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py
new file mode 100644
index 0000000..f7bd89f
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py
@@ -0,0 +1,26 @@
+"""Протоколы контекста для workflow doc-explain summary."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class RetrievalWorkflowContext(Protocol):
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None
+ retrieved_rows: list[dict]
+ rows: list[dict]
+ gate_decision: EvidenceGateDecision | None
+ answer: str
+ answer_generated_payload: dict[str, object] | None
+
+
+class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol):
+ documents: list[RetrievedSummary]
diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py
new file mode 100644
index 0000000..a1034c4
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py
@@ -0,0 +1,106 @@
+"""Логирование retrieval/pipeline/ranking для doc-explain summary."""
+
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
+
+
+def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
+ runtime.trace.module("process.v2.pipeline").log(step, payload)
+
+
+def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
+ runtime.trace.module("process.v2.rag_retrieval").log(
+ "rag_rows_fetched",
+ {
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "rows": [trace_row(row) for row in rows],
+ },
+ )
+ hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
+ log_pipeline_step(
+ runtime,
+ "candidate_generation",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "details": {
+ "target_doc_hints": list(route.anchors.target_doc_hints),
+ "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
+ },
+ "resolved_aliases": route.anchors.matched_aliases,
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
+ "sources": {
+ "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
+ "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
+ "semantic": [trace_row(row) for row in retrieved_rows[:5]],
+ },
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "retrieval_executed",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "top_results": [trace_row(row) for row in rows[:5]],
+ },
+ )
+
+
+def log_ranking(runtime, items: list) -> None:
+ top_docs: list[dict[str, object]] = []
+ for item in items[:4]:
+ top_docs.append(
+ {
+ "doc": getattr(item, "path", ""),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ }
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "top_docs_after_ranking": top_docs,
+ "ranking_score_breakdown": [
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ }
+ for item in items[:4]
+ ],
+ },
+ )
+
+
+def trace_row(row: dict) -> dict[str, object]:
+ metadata = row.get("metadata") or {}
+ content = str(row.get("content") or "").strip()
+ return {
+ "layer": str(row.get("layer") or ""),
+ "path": str(row.get("path") or ""),
+ "title": str(row.get("title") or ""),
+ "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
+ "entity_name": str(metadata.get("entity_name") or ""),
+ "summary_text": str(metadata.get("summary_text") or "")[:400],
+ "section_path": str(metadata.get("section_path") or ""),
+ "metadata_domain": str(metadata.get("domain") or ""),
+ "metadata_subdomain": str(metadata.get("subdomain") or ""),
+ "content_preview": content[:400],
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py
new file mode 100644
index 0000000..0fb356a
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py
@@ -0,0 +1,2 @@
+"""DOC_UPDATE/FROM_FEATURE workflow package."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py
new file mode 100644
index 0000000..08d6d27
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py
@@ -0,0 +1,13 @@
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import (
+ DocRulesChangesetGenerator,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
+
+__all__ = [
+ "DocRulesBundle",
+ "DocRulesChangesetGenerator",
+ "DocRulesLoader",
+ "DocRulesSelector",
+]
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py
new file mode 100644
index 0000000..b392835
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+import hashlib
+import json
+from pathlib import Path
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.prompt_builder import DocChangePromptBuilder
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
+from app.core.agent.utils.llm import AgentLlmService
+from app.infra.observability.module_trace import ModuleTrace
+from app.schemas.changeset import ChangeItem
+
+
+class DocRulesChangesetGenerator:
+ def __init__(
+ self,
+ llm: AgentLlmService,
+ selector: DocRulesSelector | None = None,
+ prompt_builder: DocChangePromptBuilder | None = None,
+ ) -> None:
+ self._llm = llm
+ self._selector = selector or DocRulesSelector()
+ self._prompt_builder = prompt_builder or DocChangePromptBuilder()
+
+ def generate(
+ self,
+ *,
+ bundle: DocRulesBundle,
+ item: PlannedChange,
+ domain: str,
+ subdomain: str,
+ project_root: str,
+ trace: ModuleTrace | None = None,
+ ) -> tuple[ChangeItem | None, str | None]:
+ selected = self._selector.select(bundle, item.doc_type)
+ if selected is None:
+ return None, f"Для doc_type '{item.doc_type}' не найдено полных doc_rules (artifact/template)."
+ prompt_input = self._prompt_builder.build(item=item, rules=selected, domain=domain, subdomain=subdomain)
+ if trace is not None:
+ trace.log(
+ "changeset_prompt_built",
+ {
+ "doc_type": item.doc_type,
+ "path": item.path,
+ "prompt_chars": len(prompt_input),
+ "rules_chars": len(selected.render_for_prompt()),
+ },
+ )
+ raw = self._llm.generate(
+ "v2_docs_update.build_doc_changeset",
+ prompt_input,
+ log_context="workflow.v2.docs_update.from_feature.changeset",
+ trace=trace,
+ )
+ payload = self._parse_json(raw)
+ if payload is None:
+ return None, f"LLM вернул невалидный JSON changeset для {item.path}."
+ payload["op"] = item.op
+ payload["path"] = item.path
+ payload["reason"] = str(payload.get("reason") or item.reason)[:500]
+ if item.op in {"update", "delete"}:
+ base_hash = self._resolve_base_hash(project_root, item.path)
+ if not base_hash:
+ return None, f"{item.op.upper()} пропущен: не удалось вычислить base_hash для {item.path}."
+ payload["base_hash"] = base_hash
+ if item.op == "delete":
+ payload.pop("proposed_content", None)
+ try:
+ return ChangeItem.model_validate(payload), None
+ except Exception as exc:
+ return None, f"Невалидный changeset item для {item.path}: {exc}"
+
+ def _parse_json(self, raw: str) -> dict[str, object] | None:
+ text = str(raw or "").strip()
+ if not text:
+ return None
+ try:
+ value = json.loads(text)
+ return value if isinstance(value, dict) else None
+ except json.JSONDecodeError:
+ return None
+
+ def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
+ root = Path(project_root or "").expanduser()
+ if not root.is_absolute():
+ return ""
+ target = root / rel_path
+ if not target.exists() or not target.is_file():
+ return ""
+ try:
+ content = target.read_text(encoding="utf-8")
+ except Exception:
+ return ""
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py
new file mode 100644
index 0000000..20dc788
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
+
+
+class DocRulesLoader:
+ def __init__(self, root: Path | None = None) -> None:
+ base = root or (Path(__file__).resolve().parents[3] / "doc_rules")
+ self._root = base
+
+ @property
+ def root(self) -> Path:
+ return self._root
+
+ def load(self) -> DocRulesBundle:
+ return DocRulesBundle(
+ documentation_rules=self._read_file(self._root / "documentation-rules.md"),
+ global_rules=self._read_folder(self._root / "global", suffix=".md"),
+ artifact_rules=self._read_folder(self._root / "artifact-types", suffix=".md"),
+ templates=self._read_templates(self._root / "templates"),
+ sections=self._read_folder(self._root / "sections", suffix=".md"),
+ )
+
+ def _read_templates(self, folder: Path) -> dict[str, str]:
+ data: dict[str, str] = {}
+ if not folder.exists() or not folder.is_dir():
+ return data
+ for path in sorted(folder.glob("*.template.md")):
+ key = path.name[: -len(".template.md")]
+ data[key] = self._safe_read(path)
+ return data
+
+ def _read_folder(self, folder: Path, suffix: str) -> dict[str, str]:
+ data: dict[str, str] = {}
+ if not folder.exists() or not folder.is_dir():
+ return data
+ for path in sorted(folder.glob(f"*{suffix}")):
+ data[path.stem] = self._safe_read(path)
+ return data
+
+ def _read_file(self, path: Path) -> str:
+ return self._safe_read(path) if path.exists() and path.is_file() else ""
+
+ def _safe_read(self, path: Path) -> str:
+ try:
+ return path.read_text(encoding="utf-8")
+ except Exception:
+ return ""
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py
new file mode 100644
index 0000000..b673c79
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from dataclasses import field
+
+
+@dataclass(slots=True)
+class DocRulesBundle:
+ documentation_rules: str = ""
+ global_rules: dict[str, str] = field(default_factory=dict)
+ artifact_rules: dict[str, str] = field(default_factory=dict)
+ templates: dict[str, str] = field(default_factory=dict)
+ sections: dict[str, str] = field(default_factory=dict)
+
+ @property
+ def supported_doc_types(self) -> set[str]:
+ return set(self.artifact_rules.keys()) & set(self.templates.keys())
+
+ def has_doc_type(self, doc_type: str) -> bool:
+ return doc_type in self.supported_doc_types
+
+ def render_global(self) -> str:
+ parts: list[str] = []
+ if self.documentation_rules.strip():
+ parts.append("### documentation-rules.md")
+ parts.append(self.documentation_rules.strip())
+ for name in sorted(self.global_rules.keys()):
+ value = self.global_rules.get(name, "").strip()
+ if not value:
+ continue
+ parts.append(f"### global/{name}.md")
+ parts.append(value)
+ return "\n\n".join(parts).strip()
+
+
+@dataclass(slots=True)
+class SelectedDocRules:
+ doc_type: str
+ global_text: str
+ artifact_text: str
+ template_text: str
+ section_texts: dict[str, str] = field(default_factory=dict)
+
+ def render_for_prompt(self) -> str:
+ parts: list[str] = []
+ if self.global_text.strip():
+ parts.append("## Global rules")
+ parts.append(self.global_text.strip())
+ if self.artifact_text.strip():
+ parts.append(f"## Artifact rules ({self.doc_type})")
+ parts.append(self.artifact_text.strip())
+ if self.template_text.strip():
+ parts.append(f"## Template ({self.doc_type})")
+ parts.append(self.template_text.strip())
+ for name in sorted(self.section_texts.keys()):
+ text = self.section_texts.get(name, "").strip()
+ if not text:
+ continue
+ parts.append(f"## Section rule: {name}")
+ parts.append(text)
+ return "\n\n".join(parts).strip()
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py
new file mode 100644
index 0000000..f8156ac
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import json
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import SelectedDocRules
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
+
+
+class DocChangePromptBuilder:
+ def build(self, item: PlannedChange, rules: SelectedDocRules, domain: str, subdomain: str) -> str:
+ payload = {
+ "change_request": {
+ "op": item.op,
+ "path": item.path,
+ "doc_type": item.doc_type,
+ "doc_id": item.doc_id,
+ "title": item.title,
+ "domain": domain,
+ "sub_domain": subdomain,
+ "reason": item.reason,
+ "source_refs": item.source_refs,
+ "related_docs": item.related_docs,
+ "requirement_body": item.requirement_body,
+ },
+ "doc_rules_context": rules.render_for_prompt(),
+ }
+ return json.dumps(payload, ensure_ascii=False, indent=2)
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py
new file mode 100644
index 0000000..44930ab
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import SelectedDocRules
+
+
+class DocRulesSelector:
+ _DEFAULT_SECTIONS: tuple[str, ...] = ("summary", "details", "requirements-format")
+ _SECTIONS_BY_TYPE: dict[str, tuple[str, ...]] = {
+ "api_method": ("summary", "details", "api-scenario", "api-contract", "requirements-format"),
+ "integration_doc": ("summary", "details", "api-contract", "requirements-format"),
+ "ui_page": ("summary", "details", "requirements-format"),
+ "logic_block": ("summary", "details", "requirements-format"),
+ "architecture_overview": ("summary", "details", "requirements-format"),
+ "domain_entity": ("summary", "details", "requirements-format"),
+ }
+
+ def select(self, bundle: DocRulesBundle, doc_type: str) -> SelectedDocRules | None:
+ artifact = bundle.artifact_rules.get(doc_type, "").strip()
+ template = bundle.templates.get(doc_type, "").strip()
+ if not artifact or not template:
+ return None
+ names = self._SECTIONS_BY_TYPE.get(doc_type, self._DEFAULT_SECTIONS)
+ section_texts = {name: bundle.sections.get(name, "") for name in names if bundle.sections.get(name, "").strip()}
+ return SelectedDocRules(
+ doc_type=doc_type,
+ global_text=bundle.render_global(),
+ artifact_text=artifact,
+ template_text=template,
+ section_texts=section_texts,
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py
new file mode 100644
index 0000000..7707422
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.buffered_graph import (
+ DocUpdateFromFeatureWorkflowGraph,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import (
+ DocUpdateFromFeatureContext,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.build_change_instructions_step import (
+ BuildChangeInstructionsStep,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.build_change_plan_step import BuildChangePlanStep
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.finalize_answer_step import FinalizeAnswerStep
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.load_doc_rules_step import LoadDocRulesStep
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.load_source_content_step import LoadSourceContentStep
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.parse_feature_requirements_step import (
+ ParseFeatureRequirementsStep,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.resolve_source_step import ResolveSourceStep
+from app.core.agent.utils.llm import AgentLlmService
+
+
+class DocUpdateFromFeatureGraph(DocUpdateFromFeatureWorkflowGraph[DocUpdateFromFeatureContext]):
+ def __init__(self, llm: AgentLlmService, doc_rules_enabled: bool = True) -> None:
+ super().__init__(
+ workflow_id="v2.docs_update.from_feature",
+ source="workflow.v2.docs_update.from_feature",
+ steps=[
+ ResolveSourceStep(),
+ LoadSourceContentStep(),
+ ParseFeatureRequirementsStep(),
+ LoadDocRulesStep(),
+ BuildChangePlanStep(llm=llm),
+ BuildChangeInstructionsStep(llm=llm, doc_rules_enabled=doc_rules_enabled),
+ FinalizeAnswerStep(),
+ ],
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py
new file mode 100644
index 0000000..8b6f55a
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py
@@ -0,0 +1,2 @@
+"""Steps for DOC_UPDATE/FROM_FEATURE workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py
new file mode 100644
index 0000000..c95510b
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import (
+ DocRulesChangesetGenerator,
+)
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.llm import AgentLlmService
+from app.core.agent.utils.workflow import WorkflowStep
+from app.schemas.changeset import ChangeItem
+from app.schemas.changeset import ChangeOp
+from app.schemas.changeset import PatchHunk
+
+
+class BuildChangeInstructionsStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "build_changeset"
+ title = "Формирование changeset"
+
+ def __init__(self, llm: AgentLlmService, doc_rules_enabled: bool = True) -> None:
+ self._doc_rules_enabled = doc_rules_enabled
+ self._generator = DocRulesChangesetGenerator(llm=llm)
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer or not context.planned_changes:
+ return context
+ llm_trace = context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm")
+ domain = (context.analytics_meta.domains or ["unknown"])[0]
+ subdomain = (context.analytics_meta.subdomains or ["unknown"])[0]
+ bundle = context.doc_rules_bundle
+ use_doc_rules = self._doc_rules_enabled and context.doc_rules_enabled and bundle is not None
+ for item in context.planned_changes:
+ if use_doc_rules:
+ assert bundle is not None
+ payload, issue = self._generator.generate(
+ bundle=bundle,
+ item=item,
+ domain=domain,
+ subdomain=subdomain,
+ project_root=context.project_root,
+ trace=llm_trace,
+ )
+ if issue:
+ context.issues.append(issue)
+ continue
+ if payload is not None:
+ context.changeset.append(payload)
+ continue
+ payload = self._build_legacy_change(context, item, domain, subdomain)
+ if payload is not None:
+ context.changeset.append(payload)
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {"changeset_items": len(context.changeset), "issues": len(context.issues)}
+
+ def _build_legacy_change(self, context: DocUpdateFromFeatureContext, item, domain: str, subdomain: str) -> ChangeItem | None:
+ op = ChangeOp(item.op)
+ if op == ChangeOp.DELETE:
+ base_hash = self._resolve_base_hash(context, item.path)
+ if not base_hash:
+ context.issues.append(f"DELETE пропущен: не удалось вычислить base_hash для {item.path}.")
+ return None
+ return ChangeItem(
+ op=op,
+ path=item.path,
+ base_hash=base_hash,
+ reason=item.reason,
+ )
+ content = self._build_doc_content(item.doc_id, item.title, item.doc_type, domain, subdomain, item)
+ if op == ChangeOp.CREATE:
+ return ChangeItem(op=op, path=item.path, proposed_content=content, reason=item.reason)
+ base_hash = self._resolve_base_hash(context, item.path)
+ if not base_hash:
+ context.issues.append(f"UPDATE пропущен: не удалось вычислить base_hash для {item.path}.")
+ return None
+ return ChangeItem(
+ op=op,
+ path=item.path,
+ base_hash=base_hash,
+ proposed_content=content,
+ reason=item.reason,
+ hunks=[PatchHunk(type="append_end", new_text="\n\n## Updated From System Analysis\n\nТребуется обновить содержимое по changeset.\n")],
+ )
+
+ def _build_doc_content(self, doc_id: str, title: str, doc_type: str, domain: str, subdomain: str, item) -> str:
+ related = "\n".join(f" - {ref}" for ref in item.related_docs) if item.related_docs else " - TBD"
+ source_refs = "\n".join(f" - {ref}" for ref in item.source_refs) if item.source_refs else " - TBD"
+ functional_requirements = item.requirement_body.strip() or "Требования не были явно извлечены из unit."
+ return (
+ "---\n"
+ f"id: {doc_id}\n"
+ f"title: {title}\n"
+ f"doc_type: {doc_type}\n"
+ f"domain: {domain}\n"
+ f"sub_domain: {subdomain}\n"
+ "status: generated\n"
+ "related_docs:\n"
+ f"{related}\n"
+ "source_of_truth: system_analysis\n"
+ "system_analytics_refs:\n"
+ f"{source_refs}\n"
+ "---\n\n"
+ "## Context\n\n"
+ "Черновик сгенерирован workflow DOC_UPDATE/FROM_FEATURE на основе системной аналитики.\n\n"
+ "## Functional Requirements\n\n"
+ f"{functional_requirements}\n"
+ )
+
+ def _resolve_base_hash(self, context: DocUpdateFromFeatureContext, rel_path: str) -> str:
+ root = Path(context.project_root or "").expanduser()
+ if not root.is_absolute():
+ return ""
+ target = root / rel_path
+ if not target.exists() or not target.is_file():
+ return ""
+ try:
+ content = target.read_text(encoding="utf-8")
+ except Exception:
+ return ""
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py
new file mode 100644
index 0000000..f44220e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import json
+import re
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.system_rules import (
+ ALLOWED_DOC_TYPES,
+ DOC_TYPE_TO_FOLDER,
+ SYSTEM_RULES_TEXT,
+)
+from app.core.agent.utils.llm import AgentLlmService
+from app.core.agent.utils.workflow import WorkflowStep
+from app.core.rag.persistence.query_repository import RagQueryRepository
+
+
+class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "build_change_plan"
+ title = "Построение плана изменений"
+
+ def __init__(self, llm: AgentLlmService, query_repository: RagQueryRepository | None = None) -> None:
+ self._llm = llm
+ self._query_repository = query_repository or RagQueryRepository()
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer or not context.units:
+ return context
+ self._load_docs_state(context)
+ inferred_types = self._infer_missing_types(context)
+ state = DocsState.from_rows(context.docs_catalog_rows)
+ for index, unit in enumerate(context.units):
+ planned = self._build_unit_plan(context, unit, state, inferred_types.get(index, ""))
+ if planned is None:
+ continue
+ context.planned_changes.append(planned)
+ if not context.planned_changes:
+ context.issues.append("Не удалось построить план изменений по функциональным требованиям.")
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {
+ "docs_rows": len(context.docs_catalog_rows),
+ "planned_changes": len(context.planned_changes),
+ "issues": len(context.issues),
+ }
+
+ def _load_docs_state(self, context: DocUpdateFromFeatureContext) -> None:
+ if not context.rag_session_id:
+ context.issues.append("Отсутствует active RAG session: невозможно сопоставить изменения с текущей документацией.")
+ return
+ try:
+ context.docs_catalog_rows = self._query_repository.list_docs_scope_index_rows(context.rag_session_id)
+ except Exception as exc:
+ context.issues.append(f"Не удалось загрузить состояние документации из RAG: {exc}")
+
+ def _infer_missing_types(self, context: DocUpdateFromFeatureContext) -> dict[int, str]:
+ missing: list[tuple[int, str, str]] = []
+ for idx, unit in enumerate(context.units):
+ value = str(unit.metadata.get("type") or "").strip()
+ if not value:
+ missing.append((idx, unit.heading, unit.body[:400]))
+ if not missing:
+ return {}
+ payload = {
+ "system_rules": SYSTEM_RULES_TEXT,
+ "allowed_doc_types": list(ALLOWED_DOC_TYPES),
+ "items": [{"index": idx, "heading": h, "snippet": snippet} for idx, h, snippet in missing],
+ }
+ raw = self._llm.generate(
+ "v2_docs_update.plan_change_units",
+ json.dumps(payload, ensure_ascii=False, indent=2),
+ log_context="workflow.v2.docs_update.from_feature.plan",
+ trace=context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm"),
+ )
+ return self._parse_type_inference(raw)
+
+ def _parse_type_inference(self, raw: str) -> dict[int, str]:
+ try:
+ data = json.loads(str(raw or "").strip())
+ except json.JSONDecodeError:
+ return {}
+ rows = data.get("items") if isinstance(data, dict) else []
+ if not isinstance(rows, list):
+ return {}
+ result: dict[int, str] = {}
+ for row in rows:
+ if not isinstance(row, dict):
+ continue
+ index = row.get("index")
+ doc_type = str(row.get("doc_type") or "").strip()
+ if not isinstance(index, int) or doc_type not in ALLOWED_DOC_TYPES:
+ continue
+ result[index] = doc_type
+ return result
+
+ def _build_unit_plan(
+ self,
+ context: DocUpdateFromFeatureContext,
+ unit,
+ state: DocsState,
+ inferred_doc_type: str,
+ ) -> PlannedChange | None:
+ doc_type = str(unit.metadata.get("type") or inferred_doc_type).strip()
+ if doc_type not in ALLOWED_DOC_TYPES:
+ context.issues.append(f"Unit '{unit.heading}': неизвестный или отсутствующий type '{doc_type}'.")
+ return None
+ unit_id = str(unit.metadata.get("id") or self._make_doc_id(doc_type, unit.heading)).strip()
+ op_hint = str(unit.metadata.get("op") or "create_or_update").strip().lower()
+ target_hint = str(unit.metadata.get("target_path_hint") or "").strip()
+ path = self._resolve_path(doc_type, unit_id, unit.heading, target_hint, state)
+ op = self._resolve_op(op_hint, unit_id, path, state)
+ source_refs = self._as_list(unit.metadata.get("source_refs")) or ["section: 5. Функциональные требования"]
+ related_docs = self._as_list(unit.metadata.get("related_docs"))
+ reason = f"Из unit '{unit.heading}' системной аналитики ({context.analytics_meta.analysis_id or 'analysis'})."
+ return PlannedChange(
+ op=op,
+ path=path,
+ doc_type=doc_type,
+ reason=reason,
+ title=unit.heading,
+ doc_id=unit_id,
+ requirement_body=str(unit.body or "").strip(),
+ source_refs=source_refs,
+ related_docs=related_docs,
+ )
+
+ def _resolve_path(self, doc_type: str, unit_id: str, heading: str, hint: str, state: DocsState) -> str:
+ if unit_id in state.by_doc_id:
+ return state.by_doc_id[unit_id]
+ if hint:
+ return hint
+ folder = DOC_TYPE_TO_FOLDER.get(doc_type, "docs")
+ slug = self._slugify(unit_id or heading)
+ return f"{folder}/{slug}.md"
+
+ def _resolve_op(self, op_hint: str, unit_id: str, path: str, state: DocsState) -> str:
+ if op_hint == "delete":
+ return "delete"
+ if op_hint == "create":
+ return "create"
+ if op_hint == "update":
+ return "update"
+ if path in state.by_path or unit_id in state.by_doc_id:
+ return "update"
+ return "create"
+
+ def _make_doc_id(self, doc_type: str, heading: str) -> str:
+ slug = self._slugify(heading).replace("-", "_")
+ return f"{doc_type}.{slug}".strip(".")
+
+ def _slugify(self, value: str) -> str:
+ cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-")
+ return re.sub(r"-+", "-", cleaned) or "doc"
+
+ def _as_list(self, value: object) -> list[str]:
+ if isinstance(value, list):
+ return [str(item).strip() for item in value if str(item).strip()]
+ if isinstance(value, str) and value.strip():
+ return [value.strip()]
+ return []
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py
new file mode 100644
index 0000000..9f2709a
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(slots=True)
+class DocEntry:
+ path: str
+ doc_id: str
+
+
+@dataclass(slots=True)
+class DocsState:
+ by_path: set[str] = field(default_factory=set)
+ by_doc_id: dict[str, str] = field(default_factory=dict)
+
+ @classmethod
+ def from_rows(cls, rows: list[dict]) -> "DocsState":
+ state = cls()
+ for row in rows:
+ path = str(row.get("path") or "").strip()
+ if not path:
+ continue
+ state.by_path.add(path)
+ metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {}
+ raw_doc_id = metadata.get("id") or metadata.get("doc_id") or metadata.get("document_id")
+ doc_id = str(raw_doc_id or "").strip()
+ if doc_id and doc_id not in state.by_doc_id:
+ state.by_doc_id[doc_id] = path
+ return state
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py
new file mode 100644
index 0000000..aebadfd
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import (
+ AnalyticsMeta,
+ FeatureRequirementUnit,
+)
+
+
+@dataclass(slots=True)
+class ParsedFeatureSpec:
+ meta: AnalyticsMeta
+ units: list[FeatureRequirementUnit]
+
+
+class FeatureMarkdownParser:
+ _META_KEYS = {"analysis_id", "domains", "subdomains"}
+
+ def parse(self, content: str) -> ParsedFeatureSpec:
+ lines = content.splitlines()
+ meta = self._parse_meta(lines)
+ section_lines = self._extract_functional_section(lines)
+ units = self._parse_units(section_lines)
+ return ParsedFeatureSpec(meta=meta, units=units)
+
+ def _parse_meta(self, lines: list[str]) -> AnalyticsMeta:
+ values: dict[str, object] = {}
+ i = 0
+ while i < len(lines):
+ line = lines[i].strip()
+ if line.startswith("## "):
+ break
+ if ":" not in line:
+ i += 1
+ continue
+ key, raw = [part.strip() for part in line.split(":", 1)]
+ if key not in self._META_KEYS:
+ i += 1
+ continue
+ if raw:
+ values[key] = raw.strip("\"'")
+ i += 1
+ continue
+ items: list[str] = []
+ j = i + 1
+ while j < len(lines):
+ item_line = lines[j]
+ if not item_line.startswith(" - ") and not item_line.startswith("- "):
+ break
+ items.append(item_line.split("-", 1)[1].strip().strip("\"'"))
+ j += 1
+ values[key] = items
+ i = j
+ return AnalyticsMeta(
+ analysis_id=str(values.get("analysis_id") or "").strip(),
+ domains=self._as_list(values.get("domains")),
+ subdomains=self._as_list(values.get("subdomains")),
+ )
+
+ def _extract_functional_section(self, lines: list[str]) -> list[str]:
+ started = False
+ section: list[str] = []
+ for line in lines:
+ stripped = line.strip()
+ if stripped.startswith("## "):
+ if self._is_functional_heading(stripped):
+ started = True
+ continue
+ if started:
+ break
+ if started:
+ section.append(line)
+ return section
+
+ def _is_functional_heading(self, value: str) -> bool:
+ lowered = value.lower()
+ return "функциональные требования" in lowered and lowered.startswith("## 5")
+
+ def _parse_units(self, section_lines: list[str]) -> list[FeatureRequirementUnit]:
+ units: list[FeatureRequirementUnit] = []
+ current_title = ""
+ current_lines: list[str] = []
+ for line in section_lines:
+ if line.strip().startswith("### "):
+ if current_title:
+ units.append(self._build_unit(current_title, current_lines))
+ current_title = line.strip()[4:].strip()
+ current_lines = []
+ continue
+ if current_title:
+ current_lines.append(line)
+ if current_title:
+ units.append(self._build_unit(current_title, current_lines))
+ return units
+
+ def _build_unit(self, title: str, lines: list[str]) -> FeatureRequirementUnit:
+ metadata, body = self._split_metadata_and_body(lines)
+ return FeatureRequirementUnit(heading=title, body="\n".join(body).strip(), metadata=metadata)
+
+ def _split_metadata_and_body(self, lines: list[str]) -> tuple[dict[str, object], list[str]]:
+ meta: dict[str, object] = {}
+ body_start = 0
+ i = 0
+ while i < len(lines):
+ raw = lines[i]
+ stripped = raw.strip()
+ if not stripped:
+ body_start = i + 1
+ i += 1
+ continue
+ if ":" not in stripped:
+ break
+ key, value = [part.strip() for part in stripped.split(":", 1)]
+ if not key.isidentifier():
+ break
+ if value:
+ meta[key] = value.strip("\"'")
+ body_start = i + 1
+ i += 1
+ continue
+ items: list[str] = []
+ j = i + 1
+ while j < len(lines) and lines[j].lstrip().startswith("- "):
+ items.append(lines[j].split("-", 1)[1].strip().strip("\"'"))
+ j += 1
+ meta[key] = items
+ body_start = j
+ i = j
+ return meta, lines[body_start:]
+
+ def _as_list(self, value: object) -> list[str]:
+ if isinstance(value, list):
+ return [str(item).strip() for item in value if str(item).strip()]
+ if isinstance(value, str) and value.strip():
+ return [value.strip()]
+ return []
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py
new file mode 100644
index 0000000..5224c60
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import json
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class FinalizeAnswerStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "finalize"
+ title = "Подготовка ответа"
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer:
+ return context
+ lines: list[str] = ["DOC_UPDATE/FROM_FEATURE: результат построения changeset."]
+ if context.issues:
+ lines.append("\nОбнаружены несоответствия/нехватка данных:")
+ for issue in context.issues:
+ lines.append(f"- {issue}")
+ lines.append("\nПлан изменений:")
+ if not context.planned_changes:
+ lines.append("- Не сформирован.")
+ else:
+ for item in context.planned_changes:
+ lines.append(f"- {item.op}: {item.path} ({item.doc_type})")
+ lines.append("\nChangeset (для плагина):")
+ lines.append("```json")
+ lines.append(json.dumps([item.model_dump() for item in context.changeset], ensure_ascii=False, indent=2))
+ lines.append("```")
+ context.apply_changeset = bool(context.changeset)
+ lines.append(f"\napply_changeset: {str(context.apply_changeset).lower()}")
+ context.answer = "\n".join(lines)
+ context.answer_generated_payload = {
+ "answer_mode": "docs_update_changeset",
+ "answer_length": len(context.answer),
+ "changeset_items": len(context.changeset),
+ "apply_changeset": context.apply_changeset,
+ }
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {
+ "answer_length": len(context.answer),
+ "issues": len(context.issues),
+ "changeset_items": len(context.changeset),
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py
new file mode 100644
index 0000000..0dea2a3
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class LoadDocRulesStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "load_doc_rules"
+ title = "Загрузка doc_rules"
+
+ def __init__(self, loader: DocRulesLoader | None = None) -> None:
+ self._loader = loader or DocRulesLoader()
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer or not context.doc_rules_enabled:
+ return context
+ bundle = self._loader.load()
+ context.doc_rules_bundle = bundle
+ if not bundle.supported_doc_types:
+ context.issues.append(
+ f"doc_rules не загружены: отсутствуют пары artifact/template в {self._loader.root.as_posix()}."
+ )
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ supported = sorted((context.doc_rules_bundle.supported_doc_types if context.doc_rules_bundle else set()))
+ return {
+ "enabled": context.doc_rules_enabled,
+ "loaded": context.doc_rules_bundle is not None,
+ "supported_doc_types": supported,
+ "issues": len(context.issues),
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py
new file mode 100644
index 0000000..0402d6d
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+from pathlib import Path
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class LoadSourceContentStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "load_source"
+ title = "Загрузка системной аналитики"
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer:
+ return context
+ if not context.source_ref:
+ return context
+ if context.source_kind == "confluence_url":
+ context.issues.append("Пока не поддерживается скачивание системной аналитики по Confluence URL.")
+ return context
+ if context.source_kind != "markdown_file":
+ context.issues.append("Поддерживается только источник markdown-файла (.md).")
+ return context
+ source_path = Path(context.source_ref)
+ if not source_path.exists() or not source_path.is_file():
+ context.issues.append(f"Файл системной аналитики не найден: {context.source_ref}")
+ return context
+ try:
+ context.feature_content = source_path.read_text(encoding="utf-8")
+ context.project_root = self._resolve_project_root(source_path).as_posix()
+ except Exception as exc:
+ context.issues.append(f"Не удалось прочитать файл аналитики: {exc}")
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {
+ "source_kind": context.source_kind,
+ "content_loaded": bool(context.feature_content),
+ "project_root": context.project_root,
+ "issues": len(context.issues),
+ }
+
+ def _resolve_project_root(self, source_path: Path) -> Path:
+ parts = list(source_path.parts)
+ if "_incoming" in parts:
+ idx = parts.index("_incoming")
+ if idx > 0:
+ return Path(*parts[:idx])
+ return source_path.parent
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py
new file mode 100644
index 0000000..3634c4d
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.feature_markdown_parser import FeatureMarkdownParser
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "parse_feature"
+ title = "Парсинг функциональных требований"
+
+ def __init__(self, parser: FeatureMarkdownParser | None = None) -> None:
+ self._parser = parser or FeatureMarkdownParser()
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer or not context.feature_content:
+ return context
+ parsed = self._parser.parse(context.feature_content)
+ context.analytics_meta = parsed.meta
+ context.units = parsed.units
+ if not context.analytics_meta.analysis_id:
+ context.issues.append("Отсутствует analysis_id в metadata аналитики.")
+ if not context.analytics_meta.domains:
+ context.issues.append("Отсутствует domains в metadata аналитики.")
+ if not context.analytics_meta.subdomains:
+ context.issues.append("Отсутствует subdomains в metadata аналитики.")
+ if not context.units:
+ context.issues.append(
+ "Не найдены units в разделе '## 5. Функциональные требования' с заголовками уровня '###'."
+ )
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {
+ "analysis_id": context.analytics_meta.analysis_id,
+ "domains": context.analytics_meta.domains,
+ "subdomains": context.analytics_meta.subdomains,
+ "units": len(context.units),
+ "issues": len(context.issues),
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml
new file mode 100644
index 0000000..6e170df
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml
@@ -0,0 +1,36 @@
+namespace: v2_docs_update
+
+prompts:
+ plan_change_units: |
+ Ты классифицируешь units системной аналитики для построения плана изменений документации.
+
+ Верни только JSON:
+ {
+ "items": [
+ {"index": 0, "doc_type": "api_method", "reason": "..."}
+ ]
+ }
+
+ Правила:
+ - Используй только doc_type из allowed_doc_types.
+ - Не пропускай item, даже если не уверен: выбери наиболее близкий тип.
+ - Ориентируйся на heading и snippet.
+ - Никакого markdown и текста вне JSON.
+
+ build_doc_changeset: |
+ Ты формируешь один item changeset для документации на основе системной аналитики и правил doc_rules.
+
+ Верни только JSON-объект формата:
+ {
+ "op": "create|update|delete",
+ "path": "docs/...",
+ "reason": "краткая причина",
+ "proposed_content": "полный markdown документа для create/update"
+ }
+
+ Правила:
+ - Строго соблюдай структуру и ограничения из doc_rules_context.
+ - Для create/update верни полный итоговый markdown (frontmatter + body).
+ - Для update не используй placeholder-тексты; возвращай пригодный к сохранению документ.
+ - reason обязателен, короткий, по сути изменения.
+ - Никакого markdown и текста вне JSON.
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py
new file mode 100644
index 0000000..63a6924
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import re
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext
+from app.core.agent.utils.workflow import WorkflowStep
+
+
+class ResolveSourceStep(WorkflowStep[DocUpdateFromFeatureContext]):
+ step_id = "resolve_source"
+ title = "Определение источника аналитики"
+
+ _PATH_PATTERN = re.compile(r"(/[^\n`]+?\.md)")
+ _URL_PATTERN = re.compile(r"https?://[^\s)]+")
+
+ async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext:
+ if context.answer:
+ return context
+ query = context.route.user_query
+ path = self._extract_path(query)
+ if path:
+ context.source_ref = path
+ context.source_kind = "markdown_file"
+ return context
+ url = self._extract_url(query)
+ if url:
+ context.source_ref = url
+ context.source_kind = "confluence_url" if "confluence" in url.lower() else "url"
+ return context
+ context.issues.append("Не удалось определить источник системной аналитики (ожидался путь .md или URL).")
+ return context
+
+ def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]:
+ return {"source_kind": context.source_kind, "source_ref": context.source_ref, "issues": len(context.issues)}
+
+ def _extract_path(self, query: str) -> str:
+ if "`" in query:
+ for chunk in query.split("`"):
+ value = chunk.strip().strip('"').strip("'")
+ if value.endswith(".md") and value.startswith("/"):
+ return value
+ match = self._PATH_PATTERN.search(query)
+ if not match:
+ return ""
+ return match.group(1).strip().strip('"').strip("'")
+
+ def _extract_url(self, query: str) -> str:
+ match = self._URL_PATTERN.search(query)
+ return match.group(0).strip() if match else ""
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py
new file mode 100644
index 0000000..32574e8
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py
@@ -0,0 +1,2 @@
+"""Runtime helpers for DOC_UPDATE/FROM_FEATURE workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py
new file mode 100644
index 0000000..2f7a532
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py
@@ -0,0 +1,96 @@
+"""Buffered graph for DOC_UPDATE/FROM_FEATURE workflow."""
+
+from __future__ import annotations
+
+from typing import TypeVar
+
+from app.core.agent.utils.workflow.context import WorkflowContext
+from app.core.agent.utils.workflow.graph import WorkflowGraph
+
+TContext = TypeVar("TContext", bound=WorkflowContext)
+
+
+class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]):
+ async def run(self, context: TContext) -> TContext:
+ trace = context.runtime.trace.module(self._source)
+ trace.log("workflow_started", {"workflow_id": self._workflow_id})
+ steps_buffer: list[dict[str, object]] = []
+ for step in self._steps:
+ before = self._snapshot(context)
+ raw_inp = step.trace_input(context)
+ inp = self._merge_trace_payload(raw_inp, before)
+ request_id = context.runtime.request.request_id
+ await context.runtime.publisher.publish_status(
+ request_id,
+ self._source,
+ f"Шаг workflow: {step.title}.",
+ {"workflow_id": self._workflow_id, "step_id": step.step_id},
+ )
+ context = await step.run(context)
+ after = self._snapshot(context)
+ raw_out = step.trace_output(context)
+ out = self._merge_trace_payload(raw_out, after)
+ trace.log(
+ "workflow_step_traced",
+ {
+ "workflow_id": self._workflow_id,
+ "step": {"id": step.step_id, "title": step.title},
+ "input": inp,
+ "output": out,
+ },
+ )
+ steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
+ trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
+ trace.log("workflow_completed", {"workflow_id": self._workflow_id})
+ return context
+
+ def _merge_trace_payload(self, payload: dict[str, object] | None, snapshot: dict[str, object]) -> dict[str, object]:
+ if not payload:
+ return snapshot
+ merged = dict(payload)
+ merged["_context"] = snapshot
+ return merged
+
+ def _snapshot(self, context: TContext) -> dict[str, object]:
+ analytics = getattr(context, "analytics_meta", None)
+ units = list(getattr(context, "units", []) or [])
+ planned = list(getattr(context, "planned_changes", []) or [])
+ changeset = list(getattr(context, "changeset", []) or [])
+ bundle = getattr(context, "doc_rules_bundle", None)
+ issue_items = list(getattr(context, "issues", []) or [])
+ return {
+ "source_kind": str(getattr(context, "source_kind", "") or ""),
+ "source_ref": str(getattr(context, "source_ref", "") or ""),
+ "project_root": str(getattr(context, "project_root", "") or ""),
+ "feature_content_len": len(str(getattr(context, "feature_content", "") or "")),
+ "analysis_id": str(getattr(analytics, "analysis_id", "") or ""),
+ "domains": list(getattr(analytics, "domains", []) or []),
+ "subdomains": list(getattr(analytics, "subdomains", []) or []),
+ "units_count": len(units),
+ "unit_headings": [str(getattr(unit, "heading", "")) for unit in units[:5]],
+ "docs_rows_count": len(list(getattr(context, "docs_catalog_rows", []) or [])),
+ "doc_rules_enabled": bool(getattr(context, "doc_rules_enabled", False)),
+ "doc_rules_loaded": bundle is not None,
+ "doc_rules_supported_types": sorted(list(getattr(bundle, "supported_doc_types", set()) or set())),
+ "planned_changes_count": len(planned),
+ "planned_changes_preview": [
+ {
+ "op": str(getattr(item, "op", "") or ""),
+ "path": str(getattr(item, "path", "") or ""),
+ "doc_type": str(getattr(item, "doc_type", "") or ""),
+ }
+ for item in planned[:5]
+ ],
+ "changeset_count": len(changeset),
+ "changeset_preview": [
+ {
+ "op": str(getattr(item, "op", "") or ""),
+ "path": str(getattr(item, "path", "") or ""),
+ }
+ for item in changeset[:5]
+ ],
+ "apply_changeset": bool(getattr(context, "apply_changeset", False)),
+ "answer_len": len(str(getattr(context, "answer", "") or "")),
+ "issues_count": len(issue_items),
+ "issues_preview": [str(value) for value in issue_items[:5]],
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py
new file mode 100644
index 0000000..8b3b5a9
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.schemas.changeset import ChangeItem
+from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import (
+ AnalyticsMeta,
+ FeatureRequirementUnit,
+ PlannedChange,
+)
+
+
+@dataclass(slots=True)
+class DocUpdateFromFeatureContext:
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ source_ref: str = ""
+ source_kind: str = ""
+ project_root: str = ""
+ feature_content: str = ""
+ analytics_meta: AnalyticsMeta = field(default_factory=AnalyticsMeta)
+ units: list[FeatureRequirementUnit] = field(default_factory=list)
+ docs_catalog_rows: list[dict] = field(default_factory=list)
+ doc_rules_enabled: bool = True
+ doc_rules_bundle: DocRulesBundle | None = None
+ planned_changes: list[PlannedChange] = field(default_factory=list)
+ changeset: list[ChangeItem] = field(default_factory=list)
+ apply_changeset: bool = False
+ issues: list[str] = field(default_factory=list)
+ answer: str = ""
+ answer_generated_payload: dict[str, object] | None = None
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py
new file mode 100644
index 0000000..f81a1b6
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(slots=True)
+class AnalyticsMeta:
+ analysis_id: str = ""
+ domains: list[str] = field(default_factory=list)
+ subdomains: list[str] = field(default_factory=list)
+
+
+@dataclass(slots=True)
+class FeatureRequirementUnit:
+ heading: str
+ body: str
+ metadata: dict[str, object] = field(default_factory=dict)
+
+
+@dataclass(slots=True)
+class PlannedChange:
+ op: str
+ path: str
+ doc_type: str
+ reason: str
+ title: str
+ doc_id: str
+ requirement_body: str = ""
+ source_refs: list[str] = field(default_factory=list)
+ related_docs: list[str] = field(default_factory=list)
diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py
new file mode 100644
index 0000000..52dc0e5
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+ALLOWED_DOC_TYPES: tuple[str, ...] = (
+ "ui_page",
+ "api_method",
+ "logic_block",
+ "architecture_overview",
+ "integration_doc",
+ "domain_entity",
+ "glossary_item",
+ "index_page",
+)
+
+DOC_TYPE_TO_FOLDER: dict[str, str] = {
+ "ui_page": "docs/ui",
+ "api_method": "docs/api",
+ "logic_block": "docs/logic",
+ "architecture_overview": "docs/architecture",
+ "integration_doc": "docs/integrations",
+ "domain_entity": "docs/domains",
+ "glossary_item": "docs/glossary",
+ "index_page": "docs",
+}
+
+SYSTEM_RULES_TEXT = """
+Системные правила документации:
+1. Один устойчивый объект — один документ.
+2. Документы не должны дублировать друг друга по смыслу.
+3. Связи между документами должны быть явными (related_docs/links).
+4. Документация организована иерархически по папкам docs/*.
+5. Markdown-документ состоит из YAML frontmatter и body.
+6. Обязательные поля frontmatter: id, title, doc_type, related_docs, status, domain, sub_domain.
+""".strip()
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py
deleted file mode 100644
index 74d0a49..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
-
-__all__ = ["DocsExplainFindFilesGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py
deleted file mode 100644
index 51dad0a..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-
-from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision
-from app.core.agent.processes.v2.models import RetrievedFile, V2RouteResult
-from app.core.agent.runtime.execution_context import RuntimeExecutionContext
-
-
-@dataclass(slots=True)
-class DocsExplainFindFilesContext:
- runtime: RuntimeExecutionContext
- route: V2RouteResult
- rag_session_id: str
- files: list[RetrievedFile] = field(default_factory=list)
- gate_decision: EvidenceGateDecision | None = None
- answer: str = ""
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py
deleted file mode 100644
index e9ad91f..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import annotations
-
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.steps.finalize_find_files_answer_step import (
- FinalizeFindFilesAnswerStep,
-)
-from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph
-
-
-class DocsExplainFindFilesGraph(V2WorkflowGraph[DocsExplainFindFilesContext]):
- def __init__(self) -> None:
- super().__init__(
- workflow_id="v2.docs_explain.find_files",
- source="workflow.v2.find_files",
- steps=[FinalizeFindFilesAnswerStep()],
- )
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py
deleted file mode 100644
index 863ae4c..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
-from app.core.agent.utils.workflow import WorkflowStep
-
-
-class FinalizeFindFilesAnswerStep(WorkflowStep[DocsExplainFindFilesContext]):
- step_id = "finalize_find_files_answer"
- title = "Сборка списка файлов"
-
- async def run(self, context: DocsExplainFindFilesContext) -> DocsExplainFindFilesContext:
- if not context.files:
- context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу."
- return context
- if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist":
- context.answer = "\n".join(item.path for item in context.files[:4])
- return context
- if len(context.files) == 1:
- context.answer = context.files[0].path
- return context
- context.answer = "\n".join(item.path for item in context.files[:4])
- return context
-
- def trace_output(self, context: DocsExplainFindFilesContext) -> dict[str, object]:
- return {"answer_length": len(context.answer)}
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py
deleted file mode 100644
index 99020fa..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
-
-__all__ = ["DocsExplainSummaryGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py b/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py
deleted file mode 100644
index d1df68a..0000000
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
-from app.core.agent.processes.v2.workflows.docs_explain_summary.steps.generate_summary_answer_step import (
- GenerateSummaryAnswerStep,
-)
-from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph
-from app.core.agent.utils.llm import AgentLlmService
-
-
-class DocsExplainSummaryGraph(V2WorkflowGraph[DocsExplainSummaryContext]):
- def __init__(self, llm: AgentLlmService) -> None:
- super().__init__(
- workflow_id="v2.docs_explain.summary",
- source="workflow.v2.summary",
- steps=[GenerateSummaryAnswerStep(llm)],
- )
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md b/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md
new file mode 100644
index 0000000..c4bae97
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md
@@ -0,0 +1,162 @@
+# GENERAL_QA / SUMMARY Workflow
+
+## Контракт сабинтента
+
+| Поле | Значение |
+|---|---|
+| `domain` | `GENERAL` |
+| `intent` | `GENERAL_QA` |
+| `subintent` | `SUMMARY` |
+| `workflow_id` | `v2.general_qa.summary` |
+| `source` | `workflow.v2.general_summary` |
+
+## Диаграмма флоу
+
+```mermaid
+flowchart TD
+ A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
+ B --> C["FetchRagRowsStep"]
+ C --> D["PrepareCandidateRowsStep"]
+ D --> E["BuildSummaryEvidenceStep"]
+ E --> F["ApplySummaryEvidenceGateStep"]
+ F --> G["GenerateGeneralSummaryAnswerStep"]
+```
+
+## Шаги процесса
+
+### 1) `RequireRagSessionStep`
+
+Шаг проверяет, что есть активная RAG-сессия с проиндексированной документацией. Если сессии нет, workflow сразу возвращает ответ о недостатке опоры и ставит `missing_rag_session` в gate-решение. Это защищает от псевдо-генерации без данных.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | `V2Process` -> `GeneralQaSummaryContext` | Идентификатор RAG-сессии |
+| `self._missing_message` | Конфигурация в `graph.py` | Текст отказа при отсутствии сессии |
+| `self._missing_gate` | Конфиг шага | Преднастроенное gate-решение для раннего выхода |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой |
+| `context.gate_decision` | Заполняется `missing_gate` (`reason=missing_rag_session`) |
+| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` для раннего ответа |
+
+### 2) `ResolveRetrievalPlanStep`
+
+Шаг строит retrieval-план для общего QA-summary сценария. План включает профиль `general_qa_grounded_summary`, слои и фильтры, релевантные широкому запросу по документации. Параметры плана пишутся в trace для диагностики политики.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | `intent_router` | Route с сигналами запроса |
+| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` |
+| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
+| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог `profile/layers/limit/filters` |
+
+### 3) `FetchRagRowsStep`
+
+Шаг запускает retrieval через `V2RagRetrievalAdapter` по нормализованному запросу и плану. Адаптер объединяет seed-документы по hints и основную retrieval-выдачу. Так в `retrieved_rows` попадают как точные, так и семантически близкие источники.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rag_session_id` | Контекст workflow | Сессия поиска |
+| `context.route.normalized_query` | Route | Текст запроса для retrieval |
+| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` |
+
+### 4) `PrepareCandidateRowsStep`
+
+Шаг выполняет post-processing retrieval-строк: metadata lookup, merge и seed от target hints. Он формирует `context.rows` как итоговый набор кандидатов для summary-ranking. Одновременно пишет расширенный retrieval-trace с источниками кандидатов.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.retrieved_rows` | `FetchRagRowsStep` | Сырые retrieval-строки |
+| `context.route` | Route | Hints и сигналы запроса |
+| `self._builder` | `CandidateRowsBuilder()` | Логика подготовки кандидатов |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
+| `process.v2.rag_retrieval.rag_rows_fetched` | Лог retrieval-деталей и источников |
+
+### 5) `BuildSummaryEvidenceStep`
+
+Шаг строит shortlist summary-документов через `DocsEvidenceAssembler.assemble_summaries`. На этом этапе формируются оценка релевантности и причины совпадения для каждого документа. Результат нужен и для gate, и для финального ответа.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.rows` | `PrepareCandidateRowsStep` | Кандидаты после merge |
+| `context.route` | Route | Сигналы маршрута для scoring |
+| `self._assembler` | DI из `graph.py` | Сборщик evidence |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.documents` | `assemble_summaries(context.rows, context.route)` |
+| `process.v2.evidence.evidence_assembled` | Лог количества и путей summary-документов |
+
+### 6) `ApplySummaryEvidenceGateStep`
+
+Шаг проверяет, достаточно ли качества и объёма summary-evidence для grounded-ответа. Если gate не пройден, следующий шаг вернёт `gate_decision.message` без LLM-генерации. Это ограничивает риск неуверенных ответов.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.route` | Route | Контекст запроса |
+| `context.documents` | `BuildSummaryEvidenceStep` | Подготовленные summary |
+| `self._gate` | DI из `graph.py` | Правила gate-проверки |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` |
+| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` |
+
+### 7) `GenerateGeneralSummaryAnswerStep`
+
+Шаг финализирует ответ по одной из трёх веток: gate-message, детерминированный режим, или LLM по prompt. При LLM-ветке формируется `prompt_input` из запроса и списка опорных документов, затем вызывается `llm.generate`. На выходе всегда ставится `answer_generated_payload` с режимом и длиной ответа.
+
+**Входные параметры**
+
+| Параметр | Откуда берётся | Описание |
+|---|---|---|
+| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы |
+| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Решение gate |
+| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм |
+| `context.prompt_name` | `V2Process` | Имя prompt-шаблона |
+| `self._llm` | DI из `graph.py` | LLM-сервис |
+
+**Выходные параметры**
+
+| Параметр | Как формируется |
+|---|---|
+| `context.prompt_input` | Сборка блоков `Запрос пользователя` + `Опорные документы` |
+| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` |
+| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке ответа |
+
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py
new file mode 100644
index 0000000..138b6cf
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py
@@ -0,0 +1,3 @@
+from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph
+
+__all__ = ["GeneralQaSummaryGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py
new file mode 100644
index 0000000..96f21a4
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.buffered_graph import GeneralQaSummaryWorkflowGraph
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.apply_summary_evidence_gate_step import (
+ ApplySummaryEvidenceGateStep,
+)
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.fetch_rag_rows_step import FetchRagRowsStep
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.generate_general_summary_answer_step import (
+ GenerateGeneralSummaryAnswerStep,
+)
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.prepare_candidate_rows_step import (
+ PrepareCandidateRowsStep,
+)
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.require_rag_session_step import RequireRagSessionStep
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.resolve_retrieval_plan_step import (
+ ResolveRetrievalPlanStep,
+)
+from app.core.agent.utils.llm import AgentLlmService
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate, EvidenceGateDecision
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+
+
+class GeneralQaSummaryGraph(GeneralQaSummaryWorkflowGraph[GeneralQaSummaryContext]):
+ def __init__(
+ self,
+ llm: AgentLlmService,
+ policy_resolver: RetrievalPlanResolver,
+ rag_adapter: V2RagRetrievalAdapter,
+ evidence_assembler: DocsEvidenceAssembler,
+ evidence_gate: DocsEvidenceGate,
+ ) -> None:
+ super().__init__(
+ workflow_id="v2.general_qa.summary",
+ source="workflow.v2.general_summary",
+ steps=[
+ RequireRagSessionStep(
+ missing_message="Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией.",
+ missing_gate=EvidenceGateDecision(
+ passed=False,
+ answer_mode="insufficient_evidence",
+ reason="missing_rag_session",
+ message="Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией.",
+ ),
+ ),
+ ResolveRetrievalPlanStep(policy_resolver),
+ FetchRagRowsStep(rag_adapter),
+ PrepareCandidateRowsStep(CandidateRowsBuilder()),
+ BuildSummaryEvidenceStep(evidence_assembler),
+ ApplySummaryEvidenceGateStep(evidence_gate),
+ GenerateGeneralSummaryAnswerStep(llm),
+ ],
+ )
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py
new file mode 100644
index 0000000..60343a5
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
+
+
+class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "apply_summary_evidence_gate"
+ title = "Проверка summary evidence"
+
+ def __init__(self, gate: DocsEvidenceGate) -> None:
+ self._gate = gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.gate_decision = self._gate.check_summaries(context.route, context.documents)
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": context.gate_decision.passed,
+ "reason": context.gate_decision.reason,
+ "answer_mode": context.gate_decision.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py
new file mode 100644
index 0000000..a82292e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
+
+
+class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "build_summary_evidence"
+ title = "Сборка summary evidence"
+
+ def __init__(self, assembler: DocsEvidenceAssembler) -> None:
+ self._assembler = assembler
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ context.documents = self._assembler.assemble_summaries(context.rows, context.route)
+ context.runtime.trace.module("process.v2.evidence").log(
+ "evidence_assembled",
+ {
+ "mode": "summary",
+ "document_count": len(context.documents),
+ "documents": [item.path for item in context.documents],
+ },
+ )
+ log_pipeline_step(
+ context.runtime,
+ "evidence_assembled",
+ {
+ "mode": "summary",
+ "primary_doc": context.documents[0].path if context.documents else None,
+ "document_count": len(context.documents),
+ },
+ )
+ log_ranking(context.runtime, context.documents)
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"document_count": len(context.documents)}
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py
new file mode 100644
index 0000000..8974716
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "fetch_rag_rows"
+ title = "Получение строк из RAG"
+
+ def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
+ self._rag_adapter = rag_adapter
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ context.retrieved_rows = await self._rag_adapter.fetch_rows(
+ context.rag_session_id,
+ context.route.normalized_query,
+ context.retrieval_plan,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"retrieved_row_count": len(context.retrieved_rows)}
diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py
similarity index 59%
rename from src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py
rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py
index 5e6945f..9339e66 100644
--- a/src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py
@@ -2,24 +2,31 @@ from __future__ import annotations
import asyncio
-from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext
from app.core.agent.utils.llm import AgentLlmService
from app.core.agent.utils.workflow import WorkflowStep
-class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]):
+class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralQaSummaryContext]):
step_id = "generate_general_summary_answer"
title = "Общий ответ через LLM"
def __init__(self, llm: AgentLlmService) -> None:
self._llm = llm
- async def run(self, context: GeneralSummaryContext) -> GeneralSummaryContext:
+ async def run(self, context: GeneralQaSummaryContext) -> GeneralQaSummaryContext:
+ if context.answer:
+ return context
if context.gate_decision is not None and not context.gate_decision.passed:
context.answer = context.gate_decision.message
+ context.answer_generated_payload = {
+ "answer_mode": context.gate_decision.answer_mode,
+ "answer_length": len(context.answer),
+ }
return context
if not context.workflow_llm_enabled:
context.answer = self._build_deterministic_answer(context)
+ context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
context.prompt_input = self._build_prompt_input(context)
request_id = context.runtime.request.request_id
@@ -30,13 +37,11 @@ class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]):
log_context=f"agent:{request_id}",
trace=context.runtime.trace.module("workflow.v2.general_summary.llm"),
)
+ context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
return context
- def _build_prompt_input(self, context: GeneralSummaryContext) -> str:
- blocks = [
- f"Запрос пользователя:\n{context.route.user_query}",
- "Опорные документы:",
- ]
+ def _build_prompt_input(self, context: GeneralQaSummaryContext) -> str:
+ blocks = [f"Запрос пользователя:\n{context.route.user_query}", "Опорные документы:"]
for index, item in enumerate(context.documents, start=1):
blocks.append(
f"{index}. path: {item.path}\n"
@@ -45,13 +50,13 @@ class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]):
)
return "\n\n".join(blocks)
- def _build_deterministic_answer(self, context: GeneralSummaryContext) -> str:
+ def _build_deterministic_answer(self, context: GeneralQaSummaryContext) -> str:
if not context.documents:
return "В найденной документации нет достаточной опоры для общего summary по запросу."
return "\n".join(item.summary for item in context.documents[:2] if item.summary)
- def trace_input(self, context: GeneralSummaryContext) -> dict[str, object]:
+ def trace_input(self, context: GeneralQaSummaryContext) -> dict[str, object]:
return {"query": context.route.normalized_query}
- def trace_output(self, context: GeneralSummaryContext) -> dict[str, object]:
+ def trace_output(self, context: GeneralQaSummaryContext) -> dict[str, object]:
return {"answer_length": len(context.answer)}
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py
new file mode 100644
index 0000000..d1c7553
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_retrieval_trace
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "prepare_candidate_rows"
+ title = "Подготовка candidate rows"
+
+ def __init__(self, builder: CandidateRowsBuilder) -> None:
+ self._builder = builder
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer or context.retrieval_plan is None:
+ return context
+ prepared = self._builder.build(context.retrieved_rows, context.route)
+ context.rows = prepared.rows
+ log_retrieval_trace(
+ context.runtime,
+ context.route,
+ context.retrieval_plan,
+ context.retrieved_rows,
+ prepared.metadata_rows,
+ prepared.rows,
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"row_count": len(context.rows)}
diff --git a/src/app/core/agent/processes/v2/general_prompts.yml b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml
similarity index 100%
rename from src/app/core/agent/processes/v2/general_prompts.yml
rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py
new file mode 100644
index 0000000..44bae7e
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "require_rag_session"
+ title = "Проверка RAG-сессии"
+
+ def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
+ self._missing_message = missing_message
+ self._missing_gate = missing_gate
+
+ async def run(self, context: TContext) -> TContext:
+ if context.rag_session_id:
+ return context
+ context.answer = self._missing_message
+ if self._missing_gate is not None:
+ context.gate_decision = self._missing_gate
+ context.answer_generated_payload = {
+ "answer_mode": self._missing_gate.answer_mode,
+ "answer_length": len(context.answer),
+ }
+ log_pipeline_step(
+ context.runtime,
+ "evidence_gate_checked",
+ {
+ "passed": self._missing_gate.passed,
+ "reason": self._missing_gate.reason,
+ "answer_mode": self._missing_gate.answer_mode,
+ },
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"has_rag_session": bool(context.rag_session_id)}
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py
new file mode 100644
index 0000000..d48fb84
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Generic, TypeVar
+
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
+from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step
+from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
+from app.core.agent.utils.workflow import WorkflowStep
+
+TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
+
+
+class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
+ step_id = "resolve_retrieval_plan"
+ title = "Выбор retrieval-плана"
+
+ def __init__(self, resolver: RetrievalPlanResolver) -> None:
+ self._resolver = resolver
+
+ async def run(self, context: TContext) -> TContext:
+ if context.answer:
+ return context
+ plan = self._resolver.resolve(context.route)
+ context.retrieval_plan = plan
+ context.runtime.trace.module("process.v2.retrieval_policy").log(
+ "retrieval_plan_resolved",
+ {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
+ )
+ log_pipeline_step(
+ context.runtime,
+ "retrieval_profile_selected",
+ {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
+ )
+ return context
+
+ def trace_output(self, context: TContext) -> dict[str, object]:
+ return {"profile": getattr(context.retrieval_plan, "profile", "")}
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py
new file mode 100644
index 0000000..bcd4a4a
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py
@@ -0,0 +1,2 @@
+"""Retrieval-related step helpers for the general-qa summary workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py
new file mode 100644
index 0000000..b1d88cf
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py
@@ -0,0 +1,43 @@
+"""Сборка candidate rows для general QA summary (метаданные + сиды по hints)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
+ RagRowIndex,
+ merge_row_lists,
+ normalize_doc_path,
+ normalized_path_set,
+ seed_candidates_from_target_hints,
+)
+
+
+@dataclass(slots=True)
+class CandidateRowsResult:
+ metadata_rows: list[dict]
+ rows: list[dict]
+
+
+class CandidateRowsBuilder:
+ def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
+ metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
+ rows = merge_row_lists(retrieved_rows, metadata_rows)
+ rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
+ self._print_missing_target_hints(route, rows)
+ return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
+
+ def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
+ if not route.anchors.target_doc_hints:
+ return
+ candidate_paths = normalized_path_set(rows)
+ for hint in route.anchors.target_doc_hints:
+ if not str(hint or "").strip():
+ continue
+ normalized = normalize_doc_path(hint)
+ if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
+ continue
+ if normalized not in candidate_paths:
+ print("ERROR: target doc missing from candidates:", normalized)
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py
new file mode 100644
index 0000000..ddf4b80
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult
+from app.core.rag.contracts.enums import RagLayer
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class GeneralQaSummaryRetrievalPolicy:
+ _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS]
+
+ def supports(self, route: V2RouteResult) -> bool:
+ return route.intent == V2Intent.GENERAL_QA
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ return RetrievalPlan(
+ profile="general_qa_grounded_summary",
+ layers=list(self._LAYERS),
+ limit=8,
+ filters={
+ "prefer_path_prefixes": ["docs/architecture/", "docs/"],
+ "prefer_like_patterns": ["%readme.md%", "%overview%"],
+ "target_doc_hints": list(route.anchors.target_doc_hints),
+ },
+ )
+
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py
new file mode 100644
index 0000000..1f799db
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py
@@ -0,0 +1,2 @@
+"""Runtime helpers for the general-qa summary workflow."""
+
diff --git a/src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py
similarity index 73%
rename from src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py
rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py
index b3db545..efe5b81 100644
--- a/src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py
@@ -1,18 +1,16 @@
-"""Workflow-граф v2: буфер шаговых логов и один сброс в trace в конце прогона."""
+"""Граф workflow general QA summary: буфер шагов и один сброс в trace (на базе utils.workflow)."""
from __future__ import annotations
-from typing import Generic, Sequence, TypeVar
+from typing import TypeVar
from app.core.agent.utils.workflow.context import WorkflowContext
from app.core.agent.utils.workflow.graph import WorkflowGraph
-from app.core.agent.utils.workflow.step import WorkflowStep
-
TContext = TypeVar("TContext", bound=WorkflowContext)
-class V2WorkflowGraph(WorkflowGraph[TContext]):
+class GeneralQaSummaryWorkflowGraph(WorkflowGraph[TContext]):
"""Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
async def run(self, context: TContext) -> TContext:
@@ -30,6 +28,15 @@ class V2WorkflowGraph(WorkflowGraph[TContext]):
)
context = await step.run(context)
out = step.trace_output(context)
+ trace.log(
+ "workflow_step_traced",
+ {
+ "workflow_id": self._workflow_id,
+ "step": {"id": step.step_id, "title": step.title},
+ "input": inp,
+ "output": out,
+ },
+ )
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
trace.log(
"workflow_trace_flushed",
diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py
similarity index 50%
rename from src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py
rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py
index 660dd63..114f0db 100644
--- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py
@@ -2,19 +2,24 @@ from __future__ import annotations
from dataclasses import dataclass, field
-from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision
-from app.core.agent.processes.v2.models import RetrievedSummary, V2RouteResult
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
@dataclass(slots=True)
-class DocsExplainSummaryContext:
+class GeneralQaSummaryContext:
runtime: RuntimeExecutionContext
route: V2RouteResult
rag_session_id: str
prompt_name: str
workflow_llm_enabled: bool = True
+ retrieval_plan: RetrievalPlan | None = None
+ retrieved_rows: list[dict] = field(default_factory=list)
+ rows: list[dict] = field(default_factory=list)
documents: list[RetrievedSummary] = field(default_factory=list)
gate_decision: EvidenceGateDecision | None = None
prompt_input: str = ""
answer: str = ""
+ answer_generated_payload: dict[str, object] | None = None
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py
new file mode 100644
index 0000000..88b3f92
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py
@@ -0,0 +1,26 @@
+"""Протоколы контекста для workflow general QA summary."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
+from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class RetrievalWorkflowContext(Protocol):
+ runtime: RuntimeExecutionContext
+ route: V2RouteResult
+ rag_session_id: str
+ retrieval_plan: RetrievalPlan | None
+ retrieved_rows: list[dict]
+ rows: list[dict]
+ gate_decision: EvidenceGateDecision | None
+ answer: str
+ answer_generated_payload: dict[str, object] | None
+
+
+class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol):
+ documents: list[RetrievedSummary]
diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py
new file mode 100644
index 0000000..9f1b542
--- /dev/null
+++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py
@@ -0,0 +1,106 @@
+"""Логирование retrieval/pipeline/ranking для general QA summary."""
+
+from __future__ import annotations
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
+
+
+def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
+ runtime.trace.module("process.v2.pipeline").log(step, payload)
+
+
+def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
+ runtime.trace.module("process.v2.rag_retrieval").log(
+ "rag_rows_fetched",
+ {
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "rows": [trace_row(row) for row in rows],
+ },
+ )
+ hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
+ log_pipeline_step(
+ runtime,
+ "candidate_generation",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "details": {
+ "target_doc_hints": list(route.anchors.target_doc_hints),
+ "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
+ },
+ "resolved_aliases": route.anchors.matched_aliases,
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
+ "sources": {
+ "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
+ "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
+ "semantic": [trace_row(row) for row in retrieved_rows[:5]],
+ },
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "retrieval_executed",
+ {
+ "query": route.user_query,
+ "profile": plan.profile,
+ "row_count": len(rows),
+ "target_doc_hints": route.anchors.target_doc_hints,
+ "top_results": [trace_row(row) for row in rows[:5]],
+ },
+ )
+
+
+def log_ranking(runtime, items: list) -> None:
+ top_docs: list[dict[str, object]] = []
+ for item in items[:4]:
+ top_docs.append(
+ {
+ "doc": getattr(item, "path", ""),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ }
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ "score": getattr(item, "score", 0),
+ "match_reason": getattr(item, "match_reason", ""),
+ },
+ )
+ log_pipeline_step(
+ runtime,
+ "ranking_explained",
+ {
+ "top_docs_after_ranking": top_docs,
+ "ranking_score_breakdown": [
+ {
+ "doc": getattr(item, "path", ""),
+ "score_breakdown": getattr(item, "score_breakdown", {}),
+ }
+ for item in items[:4]
+ ],
+ },
+ )
+
+
+def trace_row(row: dict) -> dict[str, object]:
+ metadata = row.get("metadata") or {}
+ content = str(row.get("content") or "").strip()
+ return {
+ "layer": str(row.get("layer") or ""),
+ "path": str(row.get("path") or ""),
+ "title": str(row.get("title") or ""),
+ "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
+ "entity_name": str(metadata.get("entity_name") or ""),
+ "summary_text": str(metadata.get("summary_text") or "")[:400],
+ "section_path": str(metadata.get("section_path") or ""),
+ "metadata_domain": str(metadata.get("domain") or ""),
+ "metadata_subdomain": str(metadata.get("subdomain") or ""),
+ "content_preview": content[:400],
+ }
diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py
deleted file mode 100644
index 732779c..0000000
--- a/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
-
-__all__ = ["GeneralSummaryGraph"]
diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/context.py b/src/app/core/agent/processes/v2/workflows/general_summary/context.py
deleted file mode 100644
index d2b2507..0000000
--- a/src/app/core/agent/processes/v2/workflows/general_summary/context.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-
-from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision
-from app.core.agent.processes.v2.models import RetrievedSummary, V2RouteResult
-from app.core.agent.runtime.execution_context import RuntimeExecutionContext
-
-
-@dataclass(slots=True)
-class GeneralSummaryContext:
- runtime: RuntimeExecutionContext
- route: V2RouteResult
- prompt_name: str
- workflow_llm_enabled: bool = True
- documents: list[RetrievedSummary] = field(default_factory=list)
- gate_decision: EvidenceGateDecision | None = None
- prompt_input: str = ""
- answer: str = ""
diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/graph.py b/src/app/core/agent/processes/v2/workflows/general_summary/graph.py
deleted file mode 100644
index 5441cad..0000000
--- a/src/app/core/agent/processes/v2/workflows/general_summary/graph.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
-from app.core.agent.processes.v2.workflows.general_summary.steps.generate_general_summary_answer_step import (
- GenerateGeneralSummaryAnswerStep,
-)
-from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph
-from app.core.agent.utils.llm import AgentLlmService
-
-
-class GeneralSummaryGraph(V2WorkflowGraph[GeneralSummaryContext]):
- def __init__(self, llm: AgentLlmService) -> None:
- super().__init__(
- workflow_id="v2.general_qa.summary",
- source="workflow.v2.general_summary",
- steps=[GenerateGeneralSummaryAnswerStep(llm)],
- )
diff --git a/src/app/core/agent/runtime/agent_runtime.py b/src/app/core/agent/runtime/agent_runtime.py
index 817513e..00f2ba7 100644
--- a/src/app/core/agent/runtime/agent_runtime.py
+++ b/src/app/core/agent/runtime/agent_runtime.py
@@ -47,6 +47,8 @@ class AgentRuntime:
await self._announce_start(request.request_id, process.version)
result = await self._process_runner.run(context, process)
request.answer = result.answer
+ request.changeset = list(result.changeset)
+ request.apply_changeset = bool(result.apply_changeset)
await self._publish_result(request)
self._complete_request(request, session)
except Exception as exc:
diff --git a/src/app/core/agent/utils/process_v2/__init__.py b/src/app/core/agent/utils/process_v2/__init__.py
new file mode 100644
index 0000000..5868c0e
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/__init__.py
@@ -0,0 +1 @@
+"""Shared runtime helpers for process v2."""
diff --git a/src/app/core/agent/processes/v2/anchor_signals.py b/src/app/core/agent/utils/process_v2/anchor_signals.py
similarity index 81%
rename from src/app/core/agent/processes/v2/anchor_signals.py
rename to src/app/core/agent/utils/process_v2/anchor_signals.py
index ea9b177..21046f0 100644
--- a/src/app/core/agent/processes/v2/anchor_signals.py
+++ b/src/app/core/agent/utils/process_v2/anchor_signals.py
@@ -1,6 +1,6 @@
from __future__ import annotations
-from app.core.agent.processes.v2.models import V2AnchorType, V2RouteAnchors, V2RouteResult, V2Subintent
+from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteAnchors, V2RouteResult, V2Subintent
def anchor_signal_types(route: V2RouteResult) -> set[str]:
@@ -28,6 +28,11 @@ def route_anchor_summary(route: V2RouteResult) -> dict[str, object]:
"matched_aliases": list(route.anchors.matched_aliases),
"process_domain": route.anchors.process_domain,
"process_subdomain": route.anchors.process_subdomain,
+ "scope_type": route.scope_type,
+ "candidate_domains": [c.value for c in route.anchors.candidate_domains],
+ "candidate_subdomains": [c.value for c in route.anchors.candidate_subdomains],
+ "candidate_entities": [c.value for c in route.anchors.candidate_entities],
+ "candidate_apis": [c.value for c in route.anchors.candidate_apis],
"signal_types": sorted(anchor_signal_types(route)),
}
diff --git a/src/app/core/agent/utils/process_v2/evidence/__init__.py b/src/app/core/agent/utils/process_v2/evidence/__init__.py
new file mode 100644
index 0000000..b518b2d
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/evidence/__init__.py
@@ -0,0 +1,4 @@
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate, EvidenceGateDecision
+
+__all__ = ["DocsEvidenceAssembler", "DocsEvidenceGate", "EvidenceGateDecision"]
diff --git a/src/app/core/agent/processes/v2/evidence/assembler.py b/src/app/core/agent/utils/process_v2/evidence/assembler.py
similarity index 95%
rename from src/app/core/agent/processes/v2/evidence/assembler.py
rename to src/app/core/agent/utils/process_v2/evidence/assembler.py
index 537a9cc..5b241da 100644
--- a/src/app/core/agent/processes/v2/evidence/assembler.py
+++ b/src/app/core/agent/utils/process_v2/evidence/assembler.py
@@ -4,15 +4,16 @@ from __future__ import annotations
import re
-from app.core.agent.processes.v2.anchor_signals import anchor_signal_types
-from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2RouteResult
-from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path
+from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
+from app.core.agent.utils.process_v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2RouteResult
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path
from app.core.rag.contracts.enums import RagLayer
class DocsEvidenceAssembler:
_API_PATH_PREFIXES = ("docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/")
_GENERIC_DOC_MARKERS = ("readme", "overview", "index", "navigation", "related docs", "catalog")
+
def assemble_summaries(self, rows: list[dict], route: V2RouteResult) -> list[RetrievedSummary]:
items = self._rank_rows(rows, route, mode="summary")
ranked = [
@@ -140,7 +141,6 @@ class DocsEvidenceAssembler:
if mode == "find_files":
breakdown["path_match"] *= 3
breakdown["filename_match"] *= 2
- breakdown["alias_match"] *= 1
breakdown["semantic"] = max(0, breakdown["semantic"] // 2)
return breakdown
@@ -181,10 +181,7 @@ class DocsEvidenceAssembler:
hn = normalize_doc_path(hint).lower()
if hn in top_norm:
continue
- candidate = next(
- (item for item in ranked if normalize_doc_path(item["path"]).lower() == hn),
- None,
- )
+ candidate = next((item for item in ranked if normalize_doc_path(item["path"]).lower() == hn), None)
if candidate is None:
continue
if len(top) < k:
@@ -203,7 +200,10 @@ class DocsEvidenceAssembler:
first = ranked[0]
if not first.get("is_generic_doc"):
return ranked
- promoted = next((item for item in ranked[1:] if not item.get("is_generic_doc") and self._is_specific_candidate(item, route)), None)
+ promoted = next(
+ (item for item in ranked[1:] if not item.get("is_generic_doc") and self._is_specific_candidate(item, route)),
+ None,
+ )
if promoted is None:
return ranked
return [promoted] + [item for item in ranked if item["path"] != promoted["path"]]
diff --git a/src/app/core/agent/processes/v2/evidence/gate.py b/src/app/core/agent/utils/process_v2/evidence/gate.py
similarity index 94%
rename from src/app/core/agent/processes/v2/evidence/gate.py
rename to src/app/core/agent/utils/process_v2/evidence/gate.py
index 37d961b..ed6b0d6 100644
--- a/src/app/core/agent/processes/v2/evidence/gate.py
+++ b/src/app/core/agent/utils/process_v2/evidence/gate.py
@@ -2,8 +2,8 @@ from __future__ import annotations
from dataclasses import dataclass, field
-from app.core.agent.processes.v2.anchor_signals import anchor_signal_types
-from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2Intent, V2RouteResult
+from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
+from app.core.agent.utils.process_v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2Intent, V2RouteResult
@dataclass(slots=True)
diff --git a/src/app/core/agent/processes/v2/models.py b/src/app/core/agent/utils/process_v2/models.py
similarity index 70%
rename from src/app/core/agent/processes/v2/models.py
rename to src/app/core/agent/utils/process_v2/models.py
index a9b86bc..51e9751 100644
--- a/src/app/core/agent/processes/v2/models.py
+++ b/src/app/core/agent/utils/process_v2/models.py
@@ -1,4 +1,4 @@
-"""Типы маршрута и выдачи retrieval для процесса v2."""
+"""Route and retrieval models for process v2."""
from __future__ import annotations
@@ -12,12 +12,15 @@ class V2Domain:
class V2Intent:
DOC_EXPLAIN = "DOC_EXPLAIN"
+ DOC_UPDATE = "DOC_UPDATE"
GENERAL_QA = "GENERAL_QA"
class V2Subintent:
SUMMARY = "SUMMARY"
FIND_FILES = "FIND_FILES"
+ API_EXPOSED = "API_EXPOSED"
+ FROM_FEATURE = "FROM_FEATURE"
class V2AnchorType:
@@ -29,6 +32,26 @@ class V2AnchorType:
FIND_FILES = "FIND_FILES"
+class V2ScopeType:
+ """Grounded documentation scope (pre-LLM, catalog-backed)."""
+
+ GLOBAL = "global"
+ DOMAIN = "domain"
+ SUBDOMAIN = "subdomain"
+ ENTITY = "entity"
+ UNKNOWN = "unknown"
+
+
+@dataclass(slots=True)
+class ScopeCandidate:
+ """A single catalog-backed match candidate for intent-router scope grounding."""
+
+ value: str
+ score: float
+ source_layer: str
+ match_type: str
+
+
@dataclass(slots=True)
class V2RouteAnchors:
"""Якоря из запроса для retrieval и downstream."""
@@ -40,6 +63,10 @@ class V2RouteAnchors:
matched_aliases: list[str] = field(default_factory=list)
process_domain: str | None = None
process_subdomain: str | None = None
+ candidate_domains: list[ScopeCandidate] = field(default_factory=list)
+ candidate_subdomains: list[ScopeCandidate] = field(default_factory=list)
+ candidate_entities: list[ScopeCandidate] = field(default_factory=list)
+ candidate_apis: list[ScopeCandidate] = field(default_factory=list)
@dataclass(slots=True)
@@ -55,6 +82,7 @@ class V2RouteResult:
routing_mode: str = "deterministic"
llm_router_used: bool = False
reason_short: str = ""
+ scope_type: str = V2ScopeType.UNKNOWN
@property
def domain(self) -> str:
diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py b/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py
new file mode 100644
index 0000000..8b4183a
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py
@@ -0,0 +1,6 @@
+"""Построение RetrievalPlan по маршруту v2 (intent/subintent, якоря, фильтры)."""
+
+from app.core.agent.utils.process_v2.plan_resolver.base import RetrievalPlanResolver, WorkflowRetrievalPlanResolver
+from app.core.agent.utils.process_v2.plan_resolver.policy_resolver import V2RetrievalPolicyResolver
+
+__all__ = ["RetrievalPlanResolver", "WorkflowRetrievalPlanResolver", "V2RetrievalPolicyResolver"]
diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/base.py b/src/app/core/agent/utils/process_v2/plan_resolver/base.py
new file mode 100644
index 0000000..da8ccb2
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/plan_resolver/base.py
@@ -0,0 +1,26 @@
+"""Contracts for v2 retrieval-plan resolvers."""
+
+from __future__ import annotations
+
+from typing import Protocol
+
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class RetrievalPlanResolver(Protocol):
+ """Minimal contract used by workflows to build retrieval plans."""
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ """Build a retrieval plan for the provided route."""
+
+
+class WorkflowRetrievalPlanResolver(Protocol):
+ """Route-aware resolver implementation used by delegating policy resolver."""
+
+ def supports(self, route: V2RouteResult) -> bool:
+ """Return True when this resolver can handle the route."""
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ """Build a retrieval plan for the provided route."""
+
diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py b/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py
new file mode 100644
index 0000000..a450c68
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py
@@ -0,0 +1,42 @@
+"""Delegating retrieval policy resolver for process v2."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.retrieval_policy import (
+ DocExplainApiExposedRetrievalPolicy,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.retrieval_policy import (
+ DocExplainFindFilesRetrievalPolicy,
+)
+from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.retrieval_policy import (
+ DocExplainSummaryRetrievalPolicy,
+)
+from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.retrieval_policy import (
+ GeneralQaSummaryRetrievalPolicy,
+)
+from app.core.agent.utils.process_v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.plan_resolver.base import WorkflowRetrievalPlanResolver
+from app.core.rag.retrieval.session_retriever import RetrievalPlan
+
+
+class V2RetrievalPolicyResolver:
+ """Resolve retrieval plans by delegating to workflow-local resolvers."""
+
+ def __init__(self, resolvers: Sequence[WorkflowRetrievalPlanResolver] | None = None) -> None:
+ self._resolvers = list(resolvers or self._default_resolvers())
+
+ def resolve(self, route: V2RouteResult) -> RetrievalPlan:
+ for resolver in self._resolvers:
+ if resolver.supports(route):
+ return resolver.resolve(route)
+ raise ValueError(f"No retrieval policy for route: {(route.routing_domain, route.intent, route.subintent)!r}")
+
+ def _default_resolvers(self) -> tuple[WorkflowRetrievalPlanResolver, ...]:
+ return (
+ GeneralQaSummaryRetrievalPolicy(),
+ DocExplainFindFilesRetrievalPolicy(),
+ DocExplainApiExposedRetrievalPolicy(),
+ DocExplainSummaryRetrievalPolicy(),
+ )
diff --git a/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py b/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py
new file mode 100644
index 0000000..1e69d31
--- /dev/null
+++ b/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py
@@ -0,0 +1,17 @@
+"""Слой извлечения из RAG для process v2: адаптер к сессии, индексы и нормализация путей."""
+
+from app.core.agent.utils.process_v2.rag_retrieval.metadata_lookup import DocsMetadataLookupIndex
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
+ RagRowIndex,
+ normalize_doc_path,
+ seed_candidates_from_target_hints,
+)
+from app.core.agent.utils.process_v2.rag_retrieval.v2_rag_adapter import V2RagRetrievalAdapter
+
+__all__ = [
+ "DocsMetadataLookupIndex",
+ "RagRowIndex",
+ "V2RagRetrievalAdapter",
+ "normalize_doc_path",
+ "seed_candidates_from_target_hints",
+]
diff --git a/src/app/core/agent/processes/v2/retrieval/metadata_lookup.py b/src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py
similarity index 93%
rename from src/app/core/agent/processes/v2/retrieval/metadata_lookup.py
rename to src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py
index 6125329..1ac162f 100644
--- a/src/app/core/agent/processes/v2/retrieval/metadata_lookup.py
+++ b/src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py
@@ -1,9 +1,11 @@
+"""Индекс метаданных RAG-строк для подбора кандидатов по маршруту v2."""
+
from __future__ import annotations
import re
from collections import defaultdict
-from app.core.agent.processes.v2.models import V2RouteResult
+from app.core.agent.utils.process_v2.models import V2RouteResult
class DocsMetadataLookupIndex:
diff --git a/src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py b/src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py
similarity index 96%
rename from src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py
rename to src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py
index 3c8151f..7dcb9f5 100644
--- a/src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py
+++ b/src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py
@@ -1,3 +1,5 @@
+"""Нормализация путей документов, склейка RAG-строк и сидирование по target hints."""
+
from __future__ import annotations
diff --git a/src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py b/src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py
similarity index 54%
rename from src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py
rename to src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py
index 7246c83..fedfe70 100644
--- a/src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py
+++ b/src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py
@@ -2,7 +2,7 @@
from __future__ import annotations
-from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
merge_row_lists,
normalize_doc_path,
path_variants_for_rag_query,
@@ -17,7 +17,11 @@ class _PlanDrivenRetrieval:
async def fetch_rows(self, rag_session_id: str, query_text: str, plan: RetrievalPlan) -> list[dict]:
seeded_rows = await self._seed_from_target_hints(rag_session_id, plan)
semantic_rows = await self._retriever.retrieve(rag_session_id, query_text, plan)
- return merge_row_lists(seeded_rows, semantic_rows)
+ merged = merge_row_lists(seeded_rows, semantic_rows)
+ if not _needs_docs_catalog_fallback(plan):
+ return _apply_query_signal_filter(merged, plan)
+ fallback_rows = await self._fetch_docs_catalog_rows(rag_session_id, plan)
+ return _apply_query_signal_filter(merge_row_lists(merged, fallback_rows), plan)
async def fetch_exact_paths(self, rag_session_id: str, *, paths: list[str], layers: list[str] | None = None) -> list[dict]:
return await self._retriever.retrieve_exact_files(rag_session_id, paths=paths, layers=layers)
@@ -69,6 +73,10 @@ class _PlanDrivenRetrieval:
limit=200,
)
+ async def _fetch_docs_catalog_rows(self, rag_session_id: str, plan: RetrievalPlan) -> list[dict]:
+ rows = await self._retriever.list_docs_scope_rows(rag_session_id, limit=max(1000, plan.limit * 10))
+ return _filter_docs_rows(rows, plan)[: plan.limit]
+
def _target_doc_hints(self, plan: RetrievalPlan) -> list[str]:
raw = plan.filters.get("target_doc_hints")
if not isinstance(raw, list):
@@ -106,3 +114,85 @@ class V2RagRetrievalAdapter:
layers=layers,
limit=limit,
)
+
+
+def _needs_docs_catalog_fallback(plan: RetrievalPlan) -> bool:
+ layers = {str(item).strip() for item in plan.layers}
+ if "D1_DOCUMENT_CATALOG" not in layers:
+ return False
+ return "metadata.type" in plan.filters or "metadata.doc_type" in plan.filters
+
+
+def _filter_docs_rows(rows: list[dict], plan: RetrievalPlan) -> list[dict]:
+ allowed_layers = {str(item).strip() for item in plan.layers if str(item).strip()}
+ metadata_type = _norm(plan.filters.get("metadata.type")) or _norm(plan.filters.get("metadata.doc_type"))
+ metadata_domain = _norm(plan.filters.get("metadata.domain"))
+ metadata_subdomain = _norm(plan.filters.get("metadata.subdomain"))
+ out: list[dict] = []
+ for row in rows:
+ layer = str(row.get("layer") or "").strip()
+ if allowed_layers and layer not in allowed_layers:
+ continue
+ metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {}
+ row_type = _norm(metadata.get("type")) or _norm(metadata.get("doc_type"))
+ if metadata_type and row_type != metadata_type:
+ continue
+ if metadata_domain and _norm(metadata.get("domain")) != metadata_domain:
+ continue
+ if metadata_subdomain and _norm(metadata.get("subdomain")) != metadata_subdomain:
+ continue
+ out.append(row)
+ return sorted(out, key=lambda item: str(item.get("path") or ""))
+
+
+def _norm(value: object) -> str:
+ return str(value or "").strip().lower()
+
+
+def _apply_query_signal_filter(rows: list[dict], plan: RetrievalPlan) -> list[dict]:
+ signals = _query_signals(plan)
+ if not signals:
+ return rows
+ strict = [row for row in rows if _matches_any_signal(row, signals, strict=True)]
+ if strict:
+ return strict
+ broad = [row for row in rows if _matches_any_signal(row, signals, strict=False)]
+ return broad or rows
+
+
+def _query_signals(plan: RetrievalPlan) -> list[str]:
+ raw = plan.filters.get("query_signals")
+ if not isinstance(raw, list):
+ return []
+ return [item for item in (_norm(value) for value in raw) if item]
+
+
+def _matches_any_signal(row: dict, signals: list[str], *, strict: bool) -> bool:
+ haystack = _strict_haystack(row) if strict else _broad_haystack(row)
+ return any(signal in haystack for signal in signals)
+
+
+def _strict_haystack(row: dict) -> str:
+ metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {}
+ parts = [
+ row.get("path"),
+ row.get("title"),
+ metadata.get("endpoint"),
+ metadata.get("name"),
+ ]
+ return " ".join(_norm(part) for part in parts if _norm(part))
+
+
+def _broad_haystack(row: dict) -> str:
+ metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {}
+ parts = [
+ row.get("path"),
+ row.get("title"),
+ row.get("content"),
+ metadata.get("endpoint"),
+ metadata.get("name"),
+ metadata.get("summary_text"),
+ metadata.get("entities"),
+ metadata.get("tags"),
+ ]
+ return " ".join(_norm(part) for part in parts if _norm(part))
diff --git a/src/app/core/agent/utils/traces/__init__.py b/src/app/core/agent/utils/traces/__init__.py
deleted file mode 100644
index dc5b892..0000000
--- a/src/app/core/agent/utils/traces/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Shared trace helpers will live here."""
diff --git a/src/app/core/api/application/filesystem_snapshot_resolver.py b/src/app/core/api/application/filesystem_snapshot_resolver.py
new file mode 100644
index 0000000..1dc4496
--- /dev/null
+++ b/src/app/core/api/application/filesystem_snapshot_resolver.py
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+import hashlib
+import logging
+from pathlib import Path, PurePosixPath
+
+LOGGER = logging.getLogger(__name__)
+
+
+class FilesystemSnapshotResolver:
+ _SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", ".pytest_cache", "node_modules", "dist", "build", "target"}
+ _TEXT_EXTENSIONS = {
+ ".py",
+ ".md",
+ ".txt",
+ ".rst",
+ ".json",
+ ".yaml",
+ ".yml",
+ ".toml",
+ ".ini",
+ ".cfg",
+ ".env",
+ ".js",
+ ".ts",
+ ".tsx",
+ ".jsx",
+ ".sql",
+ ".sh",
+ }
+
+ def augment(self, *, project_id: str, files: list[dict]) -> list[dict]:
+ root = self._resolve_root(project_id)
+ request_files, excluded_request = self._normalize_request_files(files)
+ if root is None:
+ if excluded_request:
+ LOGGER.warning(
+ "snapshot resolver excluded request files: count=%s reasons=%s",
+ len(excluded_request),
+ excluded_request[:20],
+ )
+ return request_files
+ disk_files = self._collect(root)
+ merged = self._merge(disk_files=disk_files, request_files=request_files)
+ LOGGER.warning(
+ "snapshot resolver summary: project_id=%s request_total=%s request_kept=%s disk_docs=%s merged=%s",
+ project_id,
+ len(files),
+ len(request_files),
+ len(disk_files),
+ len(merged),
+ )
+ if excluded_request:
+ LOGGER.warning(
+ "snapshot resolver excluded request files: count=%s reasons=%s",
+ len(excluded_request),
+ excluded_request[:20],
+ )
+ return merged
+
+ def _resolve_root(self, project_id: str) -> Path | None:
+ candidate = Path(str(project_id or "").strip()).expanduser()
+ if not candidate.is_absolute():
+ LOGGER.warning("snapshot resolver skip fs augment: project_id is not absolute (%s)", project_id)
+ return None
+ if not candidate.exists() or not candidate.is_dir():
+ LOGGER.warning("snapshot resolver skip fs augment: project_id is not existing dir (%s)", project_id)
+ return None
+ return candidate
+
+ def _collect(self, root: Path) -> list[dict]:
+ docs_root = root / "docs"
+ if not docs_root.exists() or not docs_root.is_dir():
+ LOGGER.warning("snapshot resolver: docs root not found (%s)", docs_root)
+ return []
+ out: list[dict] = []
+ for path in sorted(docs_root.rglob("*")):
+ if not path.is_file():
+ continue
+ rel = path.relative_to(root).as_posix()
+ if self._should_skip(rel):
+ continue
+ item = self._read_file(path, rel)
+ if item is not None:
+ out.append(item)
+ return out
+
+ def _should_skip(self, rel_path: str) -> bool:
+ parts = PurePosixPath(rel_path).parts
+ if not parts:
+ return True
+ if parts[0] != "docs":
+ return True
+ if any(part in self._SKIP_DIRS for part in parts):
+ return True
+ if any(part.startswith(".") for part in parts):
+ return True
+ return Path(rel_path).suffix.lower() not in self._TEXT_EXTENSIONS
+
+ def _read_file(self, path: Path, rel_path: str) -> dict | None:
+ raw = path.read_bytes()
+ if b"\x00" in raw:
+ return None
+ content = raw.decode("utf-8", errors="ignore")
+ return {
+ "path": rel_path,
+ "content": content,
+ "content_hash": hashlib.sha256(content.encode("utf-8")).hexdigest(),
+ }
+
+ def _merge(self, *, disk_files: list[dict], request_files: list[dict]) -> list[dict]:
+ merged: dict[str, dict] = {}
+ for item in disk_files:
+ path = str(item.get("path") or "").strip()
+ if path:
+ merged[path] = item
+ for item in request_files:
+ path = str(item.get("path") or "").strip()
+ if path:
+ # Request payload has priority for same path
+ merged[path] = item
+ return list(merged.values())
+
+ def _normalize_request_files(self, files: list[dict]) -> tuple[list[dict], list[str]]:
+ kept: list[dict] = []
+ excluded: list[str] = []
+ for item in files:
+ path = str(item.get("path") or "").replace("\\", "/").strip()
+ if not path:
+ excluded.append("empty_path")
+ continue
+ rel_path = self._normalize_rel_path(path)
+ if rel_path is None:
+ excluded.append(f"{path}:outside_docs")
+ continue
+ normalized = dict(item)
+ normalized["path"] = rel_path
+ kept.append(normalized)
+ return kept, excluded
+
+ def _normalize_rel_path(self, path: str) -> str | None:
+ marker = "/docs/"
+ lowered = path.lower()
+ if lowered.startswith("docs/"):
+ rel = path
+ elif marker in lowered:
+ idx = lowered.index(marker) + 1
+ rel = path[idx:]
+ else:
+ return None
+ rel = rel.lstrip("/").replace("\\", "/")
+ return rel if rel.startswith("docs/") else None
diff --git a/src/app/core/api/application/session_bootstrap_service.py b/src/app/core/api/application/session_bootstrap_service.py
index e55a1b1..60edcdd 100644
--- a/src/app/core/api/application/session_bootstrap_service.py
+++ b/src/app/core/api/application/session_bootstrap_service.py
@@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass
+from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver
from app.core.api.application.session_service import SessionService
from app.core.api.domain.models.agent_session import AgentSession
from app.core.rag.indexing import IndexJob
@@ -15,11 +16,18 @@ class BootstrappedAgentSession:
class SessionBootstrapService:
- def __init__(self, sessions: SessionService, rag: RagModule) -> None:
+ def __init__(
+ self,
+ sessions: SessionService,
+ rag: RagModule,
+ snapshot_resolver: FilesystemSnapshotResolver | None = None,
+ ) -> None:
self._sessions = sessions
self._rag = rag
+ self._snapshot_resolver = snapshot_resolver or FilesystemSnapshotResolver()
async def create(self, project_id: str, files: list[dict]) -> BootstrappedAgentSession:
- rag_session, index_job = await self._rag.create_session(project_id=project_id, files=files)
+ resolved_files = self._snapshot_resolver.augment(project_id=project_id, files=files)
+ rag_session, index_job = await self._rag.create_session(project_id=project_id, files=resolved_files)
session = self._sessions.create(rag_session_id=rag_session.rag_session_id)
return BootstrappedAgentSession(session=session, index_job=index_job)
diff --git a/src/app/core/api/controllers/request_controller.py b/src/app/core/api/controllers/request_controller.py
index 233806f..716b94d 100644
--- a/src/app/core/api/controllers/request_controller.py
+++ b/src/app/core/api/controllers/request_controller.py
@@ -29,6 +29,8 @@ class RequestController:
status=item.status.value,
process_version=item.process_version,
answer=item.answer,
+ changeset=item.changeset,
+ apply_changeset=item.apply_changeset,
error=item.error,
created_at=item.created_at,
completed_at=item.completed_at,
diff --git a/src/app/core/api/domain/models/agent_request.py b/src/app/core/api/domain/models/agent_request.py
index c811055..b7d10db 100644
--- a/src/app/core/api/domain/models/agent_request.py
+++ b/src/app/core/api/domain/models/agent_request.py
@@ -1,9 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
+from dataclasses import field
from datetime import datetime, timezone
from app.schemas.common import ErrorPayload
+from app.schemas.changeset import ChangeItem
from app.schemas.orchestration import RequestExecutionStatus
@@ -17,6 +19,8 @@ class AgentRequest:
created_at: datetime
completed_at: datetime | None = None
answer: str | None = None
+ changeset: list[ChangeItem] = field(default_factory=list)
+ apply_changeset: bool = False
error: ErrorPayload | None = None
@classmethod
diff --git a/src/app/core/application.py b/src/app/core/application.py
index 723aaaa..31893ed 100644
--- a/src/app/core/application.py
+++ b/src/app/core/application.py
@@ -1,11 +1,12 @@
import logging
+import os
from pathlib import Path
from app.core.agent.processes import V1Process, V2Process
from app.core.agent.processes.v2 import V2IntentRouter
-from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
from app.core.agent.runtime import AgentRuntime, ProcessRegistry, ProcessRunner, RuntimeEventPublisher
from app.core.agent.utils.llm import AgentLlmService, PromptLoader
@@ -54,8 +55,12 @@ class ModularApplication:
)
_v2_prompt_loader = PromptLoader(
[
- Path(__file__).resolve().parent / "agent/processes/v2/prompts.yml",
- Path(__file__).resolve().parent / "agent/processes/v2/general_prompts.yml",
+ Path(__file__).resolve().parent
+ / "agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml",
+ Path(__file__).resolve().parent
+ / "agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml",
+ Path(__file__).resolve().parent
+ / "agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml",
Path(__file__).resolve().parent / "agent/processes/v2/intent_router/routers/prompts.yml",
]
)
@@ -66,6 +71,7 @@ class ModularApplication:
_v2_rag_adapter = V2RagRetrievalAdapter(_v2_rag_retriever)
_v2_evidence = DocsEvidenceAssembler()
_v2_policy = V2RetrievalPolicyResolver()
+ _doc_rules_enabled = os.getenv("V2_DOC_RULES_ENABLED", "true").strip().lower() in {"1", "true", "yes"}
self.agent_sessions = InMemorySessionStore()
self.agent_requests = InMemoryRequestStore()
@@ -87,6 +93,7 @@ class ModularApplication:
evidence_assembler=_v2_evidence,
router=V2IntentRouter(llm=self._v2_llm),
workflow_llm_enabled=True,
+ doc_rules_enabled=_doc_rules_enabled,
),
]
)
diff --git a/src/app/core/rag/indexing/common/path_filter.py b/src/app/core/rag/indexing/common/path_filter.py
index d178875..e757ef0 100644
--- a/src/app/core/rag/indexing/common/path_filter.py
+++ b/src/app/core/rag/indexing/common/path_filter.py
@@ -1,24 +1,38 @@
from __future__ import annotations
+import logging
from pathlib import PurePosixPath
_CACHE_DIR_NAMES = {"__pycache__", "app-data", "build", "grafana"}
+LOGGER = logging.getLogger(__name__)
def should_skip_indexing_path(path: str) -> bool:
+ return indexing_exclusion_reason(path) is not None
+
+
+def indexing_exclusion_reason(path: str) -> str | None:
parts = _path_parts(path)
if not parts:
- return True
+ return "empty_path"
for part in parts:
if part in _CACHE_DIR_NAMES:
- return True
+ return f"cache_dir:{part}"
if _is_hidden_part(part):
- return True
- return False
+ return f"hidden_part:{part}"
+ return None
def filter_snapshot_files(files: list[dict]) -> list[dict]:
- return [item for item in files if not should_skip_indexing_path(str(item.get("path", "")))]
+ out: list[dict] = []
+ for item in files:
+ path = str(item.get("path", ""))
+ reason = indexing_exclusion_reason(path)
+ if reason is not None:
+ LOGGER.warning("rag ingest excluded file: path=%s reason=%s", path, reason)
+ continue
+ out.append(item)
+ return out
def filter_changes_for_indexing(changed_files: list[dict]) -> list[dict]:
@@ -29,7 +43,14 @@ def filter_changes_for_indexing(changed_files: list[dict]) -> list[dict]:
if op == "delete":
result.append(item)
continue
- if should_skip_indexing_path(path):
+ reason = indexing_exclusion_reason(path)
+ if reason is not None:
+ LOGGER.warning(
+ "rag ingest excluded changed file: op=%s path=%s reason=%s",
+ op,
+ path,
+ reason,
+ )
continue
result.append(item)
return result
diff --git a/src/app/core/rag/indexing/docs/frontmatter_parser.py b/src/app/core/rag/indexing/docs/frontmatter_parser.py
index 8f58885..55d5743 100644
--- a/src/app/core/rag/indexing/docs/frontmatter_parser.py
+++ b/src/app/core/rag/indexing/docs/frontmatter_parser.py
@@ -1,7 +1,12 @@
from __future__ import annotations
+import logging
+import re
+
import yaml
+LOGGER = logging.getLogger(__name__)
+
class DocsFrontmatterParser:
def split(self, content: str) -> tuple[dict, str]:
@@ -11,5 +16,34 @@ class DocsFrontmatterParser:
if len(parts) < 3:
return {}, content
_, raw, body = parts
- payload = yaml.safe_load(raw) or {}
- return payload if isinstance(payload, dict) else {}, body.strip()
+ try:
+ payload = yaml.safe_load(raw) or {}
+ if isinstance(payload, dict):
+ return payload, body.strip()
+ except yaml.YAMLError as exc:
+ LOGGER.warning("docs frontmatter parse warning: reason=%s", exc.__class__.__name__)
+ payload = self._fallback(raw)
+ payload["__frontmatter_parse_error__"] = True
+ return payload, body.strip()
+
+ def _fallback(self, raw: str) -> dict:
+ payload: dict[str, object] = {}
+ for line in raw.splitlines():
+ text = line.strip()
+ if not text or text.startswith("#"):
+ continue
+ match = re.match(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$", text)
+ if not match:
+ continue
+ key = match.group(1)
+ value = match.group(2).strip()
+ if not value:
+ continue
+ if value == "[]":
+ payload[key] = []
+ continue
+ if value == "{}":
+ payload[key] = {}
+ continue
+ payload[key] = value.strip("'\"")
+ return payload
diff --git a/src/app/core/rag/indexing/docs/pipeline.py b/src/app/core/rag/indexing/docs/pipeline.py
index 0236b57..d7c0e00 100644
--- a/src/app/core/rag/indexing/docs/pipeline.py
+++ b/src/app/core/rag/indexing/docs/pipeline.py
@@ -42,6 +42,8 @@ class DocsIndexingPipeline:
def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]:
source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path)
frontmatter, body = self._frontmatter.split(content)
+ if bool(frontmatter.get("__frontmatter_parse_error__")):
+ LOGGER.error("docs pipeline partial index: path=%s reason=frontmatter_parse_error", path)
doc_kind = self._classifier.classify(path)
sections = self._chunker.chunk(body)
parsed = self._content.parse(
@@ -122,4 +124,10 @@ class DocsIndexingPipeline:
extractor_name,
exc.__class__.__name__,
)
+ LOGGER.error(
+ "docs pipeline partial index: path=%s extractor=%s reason=%s",
+ path,
+ extractor_name,
+ exc.__class__.__name__,
+ )
return []
diff --git a/src/app/core/rag/indexing/service.py b/src/app/core/rag/indexing/service.py
index e366ab9..e909be3 100644
--- a/src/app/core/rag/indexing/service.py
+++ b/src/app/core/rag/indexing/service.py
@@ -84,6 +84,7 @@ class RagService:
cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha)
pipelines = self._resolve_pipeline_names(path)
if cached:
+ self._report_missing_or_partial_docs(path, cached)
report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha))
report.cache_hit_files += 1
LOGGER.warning(
@@ -94,6 +95,7 @@ class RagService:
)
else:
built = self._build_documents(repo_id, path, file)
+ self._report_missing_or_partial_docs(path, built)
embedded = await asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha)
report.documents_list.extend(embedded)
await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded)
@@ -108,6 +110,12 @@ class RagService:
except Exception as exc:
report.failed_files += 1
report.warnings.append(f"{path}: {exc}")
+ LOGGER.error(
+ "rag ingest failed file: rag_session_id=%s path=%s error=%s",
+ rag_session_id,
+ path,
+ exc,
+ )
await self._notify_progress(progress_cb, index, total_files, path)
report.documents = len(report.documents_list)
return report
@@ -134,6 +142,16 @@ class RagService:
names.append("DOCS")
return names
+ def _report_missing_or_partial_docs(self, path: str, docs: list[RagDocument]) -> None:
+ if not docs:
+ LOGGER.error("rag ingest file not indexed: path=%s reason=no_documents_built", path)
+ return
+ if not str(path).startswith("docs/"):
+ return
+ has_d1 = any(str(doc.layer or "") == "D1_DOCUMENT_CATALOG" for doc in docs)
+ if not has_d1:
+ LOGGER.error("rag ingest file partially indexed: path=%s reason=missing_d1_document_catalog", path)
+
def _embed_documents(self, docs: list[RagDocument], file: dict, repo_id: str, blob_sha: str) -> list[RagDocument]:
if not docs:
return []
diff --git a/src/app/core/rag/persistence/query_repository.py b/src/app/core/rag/persistence/query_repository.py
index a4732ad..4d4b89b 100644
--- a/src/app/core/rag/persistence/query_repository.py
+++ b/src/app/core/rag/persistence/query_repository.py
@@ -27,6 +27,8 @@ class RagQueryRepository:
prefer_like_patterns: list[str] | None = None,
metadata_domain: str | None = None,
metadata_subdomain: str | None = None,
+ metadata_type: str | None = None,
+ metadata_doc_type: str | None = None,
prefer_non_tests: bool = False,
) -> list[dict]:
sql, params = self._builder.build_retrieve(
@@ -42,6 +44,8 @@ class RagQueryRepository:
prefer_like_patterns=prefer_like_patterns,
metadata_domain=metadata_domain,
metadata_subdomain=metadata_subdomain,
+ metadata_type=metadata_type,
+ metadata_doc_type=metadata_doc_type,
prefer_non_tests=prefer_non_tests,
)
with get_engine().connect() as conn:
@@ -286,6 +290,42 @@ class RagQueryRepository:
rows = conn.execute(stmt, params).mappings().fetchall()
return [self._row_to_dict(row) for row in rows]
+ def list_docs_scope_index_rows(
+ self,
+ rag_session_id: str,
+ *,
+ limit: int = 8000,
+ ) -> list[dict]:
+ """Return D1/D3 catalog rows for a session (no vector retrieval, no D0 chunks).
+
+ Used by the v2 intent router to derive scope candidates from indexed DOCS layers only.
+ """
+ sid = str(rag_session_id or "").strip()
+ if not sid:
+ return []
+ lim = max(1, min(int(limit), 50_000))
+ layers = ("D1_DOCUMENT_CATALOG", "D3_ENTITY_CATALOG")
+ stmt = text(
+ """
+ SELECT path, content, layer, title, metadata_json, span_start, span_end,
+ 0 AS lexical_rank,
+ 0 AS prefer_bonus,
+ 0 AS test_penalty,
+ 0 AS structural_rank,
+ 0 AS layer_rank,
+ 0 AS distance
+ FROM rag_chunks
+ WHERE rag_session_id = :sid
+ AND layer IN :layers
+ ORDER BY layer ASC, path ASC, COALESCE(span_start, 0) ASC, COALESCE(chunk_index, 0) ASC
+ LIMIT :lim
+ """
+ ).bindparams(bindparam("layers", expanding=True))
+ params: dict = {"sid": sid, "layers": list(layers), "lim": lim}
+ with get_engine().connect() as conn:
+ rows = conn.execute(stmt, params).mappings().fetchall()
+ return [self._row_to_dict(row) for row in rows]
+
def _row_to_dict(self, row) -> dict:
data = dict(row)
raw_metadata = data.pop("metadata_json")
diff --git a/src/app/core/rag/persistence/repository.py b/src/app/core/rag/persistence/repository.py
index ca5ba07..87d34bb 100644
--- a/src/app/core/rag/persistence/repository.py
+++ b/src/app/core/rag/persistence/repository.py
@@ -71,6 +71,8 @@ class RagRepository:
prefer_like_patterns: list[str] | None = None,
metadata_domain: str | None = None,
metadata_subdomain: str | None = None,
+ metadata_type: str | None = None,
+ metadata_doc_type: str | None = None,
prefer_non_tests: bool = False,
) -> list[dict]:
return self._query.retrieve(
@@ -86,6 +88,8 @@ class RagRepository:
prefer_like_patterns=prefer_like_patterns,
metadata_domain=metadata_domain,
metadata_subdomain=metadata_subdomain,
+ metadata_type=metadata_type,
+ metadata_doc_type=metadata_doc_type,
prefer_non_tests=prefer_non_tests,
)
@@ -160,3 +164,6 @@ class RagRepository:
layers=layers,
limit=limit,
)
+
+ def list_docs_scope_index_rows(self, rag_session_id: str, *, limit: int = 8000) -> list[dict]:
+ return self._query.list_docs_scope_index_rows(rag_session_id, limit=limit)
diff --git a/src/app/core/rag/persistence/retrieval_statement_builder.py b/src/app/core/rag/persistence/retrieval_statement_builder.py
index bdedb92..9db9297 100644
--- a/src/app/core/rag/persistence/retrieval_statement_builder.py
+++ b/src/app/core/rag/persistence/retrieval_statement_builder.py
@@ -21,6 +21,8 @@ class RetrievalStatementBuilder:
prefer_like_patterns: list[str] | None = None,
metadata_domain: str | None = None,
metadata_subdomain: str | None = None,
+ metadata_type: str | None = None,
+ metadata_doc_type: str | None = None,
prefer_non_tests: bool = False,
) -> tuple[str, dict]:
emb = "[" + ",".join(str(x) for x in query_embedding) + "]"
@@ -33,6 +35,8 @@ class RetrievalStatementBuilder:
self._append_like_group(filters, params, "exclude_like", exclude_like_patterns, negate=True)
self._append_metadata_equals(filters, params, "metadata_domain", "domain", metadata_domain)
self._append_metadata_equals(filters, params, "metadata_subdomain", "subdomain", metadata_subdomain)
+ self._append_metadata_equals(filters, params, "metadata_type", "type", metadata_type)
+ self._append_metadata_equals(filters, params, "metadata_doc_type", "doc_type", metadata_doc_type)
if layers:
filters.append("layer = ANY(:layers)")
params["layers"] = layers
diff --git a/src/app/core/rag/retrieval/session_retriever.py b/src/app/core/rag/retrieval/session_retriever.py
index d19e661..8a74edf 100644
--- a/src/app/core/rag/retrieval/session_retriever.py
+++ b/src/app/core/rag/retrieval/session_retriever.py
@@ -77,6 +77,13 @@ class RagSessionRetriever:
limit=limit,
)
+ async def list_docs_scope_rows(self, rag_session_id: str, *, limit: int = 8000) -> list[dict]:
+ return await asyncio.to_thread(
+ self._repository.list_docs_scope_index_rows,
+ rag_session_id,
+ limit=limit,
+ )
+
def _embed_query(self, query: str) -> list[float]:
return self._embedder.embed([query])[0]
@@ -98,4 +105,8 @@ class RagSessionRetriever:
out["metadata_domain"] = filters["metadata.domain"]
if "metadata.subdomain" in filters:
out["metadata_subdomain"] = filters["metadata.subdomain"]
+ if "metadata.type" in filters:
+ out["metadata_type"] = filters["metadata.type"]
+ if "metadata.doc_type" in filters:
+ out["metadata_type"] = filters["metadata.doc_type"]
return out
diff --git a/src/app/schemas/agent_api.py b/src/app/schemas/agent_api.py
index 20dbb87..5244d4c 100644
--- a/src/app/schemas/agent_api.py
+++ b/src/app/schemas/agent_api.py
@@ -5,6 +5,7 @@ from datetime import datetime
from pydantic import BaseModel, Field
from app.schemas.indexing import FileSnapshot, IndexJobStatus
+from app.schemas.changeset import ChangeItem
from app.schemas.common import ErrorPayload
@@ -40,6 +41,8 @@ class AgentRequestStateResponse(BaseModel):
status: str
process_version: str
answer: str | None = None
+ changeset: list[ChangeItem] = Field(default_factory=list)
+ apply_changeset: bool = False
error: ErrorPayload | None = None
created_at: datetime
completed_at: datetime | None = None
diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc
index 16a4b91..c05e49d 100644
Binary files a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc and b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc differ
diff --git a/tests/pipeline_setup_v3/runtime/v2_process_adapter.py b/tests/pipeline_setup_v3/runtime/v2_process_adapter.py
index 92c22fa..7d9dd55 100644
--- a/tests/pipeline_setup_v3/runtime/v2_process_adapter.py
+++ b/tests/pipeline_setup_v3/runtime/v2_process_adapter.py
@@ -4,43 +4,42 @@ import asyncio
from dataclasses import asdict, dataclass
from pathlib import Path
-from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
-from app.core.agent.processes.v2 import V2IntentRouter
-from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
-from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
-from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2Intent, V2Subintent
-from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
-from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
-from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
-from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
-from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
-from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
+from app.core.agent.processes.v2 import V2IntentRouter, V2Process
from app.core.agent.utils.llm import AgentLlmService, PromptLoader
-from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder
from app.core.rag.persistence import RagRepository
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
from app.core.shared.gigachat.client import GigaChatClient
from app.core.shared.gigachat.settings import GigaChatSettings
from app.core.shared.gigachat.token_provider import GigaChatTokenProvider
from app.infra.observability.module_trace import RequestTraceContext
+from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.process_v2.models import V2Intent
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex, V2RagRetrievalAdapter
from tests.pipeline_setup_v3.core.models import ExecutionPayload, V3Case
+from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder
+from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
class V2ProcessAdapter:
def __init__(self, *, workflow_llm_enabled: bool = True) -> None:
self._workflow_llm_enabled = workflow_llm_enabled
- self._router = V2IntentRouter(llm=_build_v2_llm())
+ self._llm = _build_v2_llm()
+ self._router = V2IntentRouter(llm=_KeywordLlm(), enable_llm_disambiguation=True)
self._policy = V2RetrievalPolicyResolver()
- retriever = RagSessionRetriever(repository=RagRepository(), embedder=GigaChatEmbedder(_build_client()))
+ retriever = RagSessionRetriever(repository=RagRepository(), embedder=DeterministicEmbedder())
self._retrieval = V2RagRetrievalAdapter(retriever)
- self._evidence = DocsEvidenceAssembler()
- self._gate = DocsEvidenceGate()
- self._summary_graph = DocsExplainSummaryGraph(_build_v2_llm())
- self._find_files_graph = DocsExplainFindFilesGraph()
- self._general_graph = GeneralSummaryGraph(_build_v2_llm())
+ self._process = V2Process(
+ llm=self._llm,
+ policy_resolver=self._policy,
+ rag_adapter=self._retrieval,
+ evidence_assembler=DocsEvidenceAssembler(),
+ evidence_gate=DocsEvidenceGate(),
+ router=self._router,
+ workflow_llm_enabled=workflow_llm_enabled,
+ )
def execute(self, case: V3Case, rag_session_id: str | None) -> ExecutionPayload:
return asyncio.run(self._execute_async(case, rag_session_id))
@@ -81,6 +80,8 @@ class V2ProcessAdapter:
actual=_actual_from_v2(route),
details=_details(case.query, route=route, pipeline_steps=_build_pipeline_steps(runtime.logs)),
)
+ if case.mode == "full_chain":
+ return await self._execute_full_chain(case, rag_session_id, route)
plan = self._policy.resolve(route)
_log_pipeline_step(
runtime,
@@ -121,26 +122,7 @@ class V2ProcessAdapter:
actual=_actual_from_v2(route, rows=rows, plan=plan, answer_mode="partial"),
details=_details(case.query, route=route, plan=plan, rows=rows, pipeline_steps=_build_pipeline_steps(runtime.logs)),
)
- answer, evidence, gate = await self._run_workflow(runtime, route, rag_session_id, rows)
- answer_mode = gate.answer_mode
- _log_pipeline_step(
- runtime,
- "answer_generated",
- {"answer_mode": answer_mode, "answer_length": len(answer)},
- )
- return ExecutionPayload(
- actual=_actual_from_v2(route, rows=rows, plan=plan, answer=answer, answer_mode=answer_mode),
- details=_details(
- case.query,
- route=route,
- plan=plan,
- rows=rows,
- evidence=evidence,
- answer=answer,
- logs=runtime.logs,
- pipeline_steps=_build_pipeline_steps(runtime.logs),
- ),
- )
+ raise ValueError(f"Unsupported process_v2 adapter mode: {case.mode}")
async def _retrieve_rows(self, route, rag_session_id: str | None, plan) -> list[dict]:
if not rag_session_id:
@@ -173,125 +155,54 @@ class V2ProcessAdapter:
merged.append(row)
return merged
- async def _run_workflow(
- self,
- runtime: "_RuntimeStub",
- route,
- rag_session_id: str | None,
- rows: list[dict],
- ) -> tuple[str, dict, object]:
- if route.intent == V2Intent.GENERAL_QA:
- documents = self._evidence.assemble_summaries(rows, route)
- gate = self._gate.check_summaries(route, documents)
- _log_pipeline_step(
- runtime,
- "evidence_assembled",
- {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
- )
- self._log_ranking(runtime, documents)
- _log_pipeline_step(
- runtime,
- "evidence_gate_checked",
- {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
- )
- context = GeneralSummaryContext(runtime=runtime, route=route, prompt_name="v2_general.summary_answer")
- context.workflow_llm_enabled = self._workflow_llm_enabled
- context.documents = documents
- context.gate_decision = gate
- final = await self._general_graph.run(context)
- return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate
- if route.subintent == V2Subintent.FIND_FILES:
- files = self._evidence.assemble_files(rows, route)
- gate = self._gate.check_files(route, files)
- _log_pipeline_step(
- runtime,
- "evidence_assembled",
- {"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)},
- )
- self._log_ranking(runtime, files)
- _log_pipeline_step(
- runtime,
- "evidence_gate_checked",
- {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
- )
- context = DocsExplainFindFilesContext(
- runtime=runtime,
- route=route,
- rag_session_id=rag_session_id or "",
- files=files,
- gate_decision=gate,
- )
- final = await self._find_files_graph.run(context)
- return final.answer, {"documents": [], "files": [_serialize_file(item) for item in files]}, gate
- documents = self._evidence.assemble_summaries(rows, route)
- gate = self._gate.check_summaries(route, documents)
- _log_pipeline_step(
- runtime,
- "evidence_assembled",
- {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
- )
- self._log_ranking(runtime, documents)
- _log_pipeline_step(
- runtime,
- "evidence_gate_checked",
- {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
- )
- context = DocsExplainSummaryContext(
- runtime=runtime,
- route=route,
- rag_session_id=rag_session_id or "",
- prompt_name="v2_docs_explain.summary_answer",
- workflow_llm_enabled=self._workflow_llm_enabled,
- documents=documents,
- gate_decision=gate,
- )
- final = await self._summary_graph.run(context)
- return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate
-
- def _trace_row(self, row: dict) -> dict[str, object]:
- metadata = dict(row.get("metadata") or {})
- return {
- "path": str(row.get("path") or ""),
- "layer": str(row.get("layer") or ""),
- "title": str(row.get("title") or ""),
- "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""),
- }
-
- def _log_ranking(self, runtime: "_RuntimeStub", items: list) -> None:
- top_docs: list[dict[str, object]] = []
- for item in items[:4]:
- top_docs.append(
- {
- "doc": getattr(item, "path", ""),
- "score": getattr(item, "score", 0),
- "match_reason": getattr(item, "match_reason", ""),
- }
- )
- _log_pipeline_step(
- runtime,
- "ranking_explained",
- {
- "doc": getattr(item, "path", ""),
- "score": getattr(item, "score", 0),
- "score_breakdown": getattr(item, "score_breakdown", {}),
- "match_reason": getattr(item, "match_reason", ""),
- },
- )
- _log_pipeline_step(
- runtime,
- "ranking_explained",
- {
- "top_docs_after_ranking": top_docs,
- "ranking_score_breakdown": [
- {
- "doc": getattr(item, "path", ""),
- "score_breakdown": getattr(item, "score_breakdown", {}),
- }
- for item in items[:4]
- ],
+ async def _execute_full_chain(self, case: V3Case, rag_session_id: str | None, route) -> ExecutionPayload:
+ runtime = _RuntimeStub(query=case.query, rag_session_id=rag_session_id)
+ result = await self._process.run(runtime)
+ retrieval_plan = _event_payload(runtime.logs, "process.v2.retrieval_policy", "retrieval_plan_resolved")
+ rows = list(_event_payload(runtime.logs, "process.v2.rag_retrieval", "rag_rows_fetched").get("rows") or [])
+ answer_generated = _event_payload(runtime.logs, "process.v2.pipeline", "answer_generated")
+ return ExecutionPayload(
+ actual={
+ "domain": route.routing_domain,
+ "intent": route.intent,
+ "sub_intent": route.subintent,
+ "rag_count": len(rows),
+ "llm_answer": result.answer,
+ "answer_mode": str(answer_generated.get("answer_mode") or ""),
+ "path_scope": tuple(),
+ "symbol_candidates": tuple(),
+ "entity_candidates": tuple(_entity_candidates(rows)),
+ "doc_scope": tuple(_doc_scope(rows)),
+ "layers": tuple(retrieval_plan.get("layers") or []),
+ "filters": dict(retrieval_plan.get("filters") or {}),
+ },
+ details={
+ "query": case.query,
+ "router_result": asdict(route),
+ "retrieval_plan": retrieval_plan,
+ "rows": rows,
+ "answer": result.answer,
+ "logs": runtime.logs,
+ "pipeline_steps": _build_pipeline_steps(runtime.logs),
},
)
+ def _trace_row(self, row: dict) -> dict[str, object]:
+ metadata = row.get("metadata") or {}
+ content = str(row.get("content") or "").strip()
+ return {
+ "layer": str(row.get("layer") or ""),
+ "path": str(row.get("path") or ""),
+ "title": str(row.get("title") or ""),
+ "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
+ "entity_name": str(metadata.get("entity_name") or ""),
+ "summary_text": str(metadata.get("summary_text") or "")[:400],
+ "section_path": str(metadata.get("section_path") or ""),
+ "metadata_domain": str(metadata.get("domain") or ""),
+ "metadata_subdomain": str(metadata.get("subdomain") or ""),
+ "content_preview": content[:400],
+ }
+
@dataclass(slots=True)
class _RequestStub:
@@ -320,10 +231,10 @@ class _TraceLoggerStub:
class _RuntimeStub:
- def __init__(self, *, query: str) -> None:
+ def __init__(self, *, query: str, rag_session_id: str | None = None) -> None:
self.logs: list[dict] = []
self.request = _RequestStub(request_id="pipeline_setup_v3", message=query)
- self.session = _SessionStub()
+ self.session = _SessionStub(active_rag_session_id=rag_session_id)
self.publisher = _PublisherStub()
self.trace = RequestTraceContext(request_id=self.request.request_id, logger=_TraceLoggerStub(self.logs))
@@ -335,8 +246,10 @@ def _build_client() -> GigaChatClient:
def _build_v2_llm() -> AgentLlmService:
prompt_paths = [
- Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/prompts.yml",
- Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/general_prompts.yml",
+ Path(__file__).resolve().parents[3]
+ / "src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml",
+ Path(__file__).resolve().parents[3]
+ / "src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml",
Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/intent_router/routers/prompts.yml",
]
return AgentLlmService(client=_build_client(), prompts=PromptLoader(prompt_paths))
@@ -375,7 +288,12 @@ def _doc_scope(rows: list[dict]) -> list[str]:
values: list[str] = []
for row in rows:
metadata = dict(row.get("metadata") or {})
- for candidate in (metadata.get("document_id"), metadata.get("doc_id"), row.get("path")):
+ for candidate in (
+ row.get("document_id"),
+ metadata.get("document_id"),
+ metadata.get("doc_id"),
+ row.get("path"),
+ ):
value = str(candidate or "").strip()
if value and value not in values:
values.append(value)
@@ -386,20 +304,12 @@ def _entity_candidates(rows: list[dict]) -> list[str]:
values: list[str] = []
for row in rows:
metadata = dict(row.get("metadata") or {})
- value = str(metadata.get("entity_name") or row.get("title") or "").strip()
+ value = str(row.get("entity_name") or metadata.get("entity_name") or row.get("title") or "").strip()
if value and value not in values and str(row.get("layer") or "") == "D3_ENTITY_CATALOG":
values.append(value)
return values
-def _serialize_summary(item: RetrievedSummary) -> dict:
- return asdict(item)
-
-
-def _serialize_file(item: RetrievedFile) -> dict:
- return asdict(item)
-
-
def _build_pipeline_steps(logs: list[dict]) -> list[dict]:
steps: list[dict] = []
for item in logs:
@@ -409,6 +319,16 @@ def _build_pipeline_steps(logs: list[dict]) -> list[dict]:
return steps
+def _event_payload(logs: list[dict], module: str, event: str) -> dict[str, object]:
+ for item in logs:
+ if item.get("module") == module and item.get("event") == event:
+ payload = item.get("payload") or {}
+ if isinstance(payload, dict):
+ return dict(payload)
+ return {}
+ return {}
+
+
def _log_pipeline_step(runtime: _RuntimeStub, step: str, payload: dict[str, object]) -> None:
runtime.logs.append(
{
diff --git a/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py b/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py
index 9d0939a..4fa92c9 100644
--- a/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py
+++ b/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py
@@ -14,7 +14,7 @@ from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
class ProcessV2FullChainExecutor:
def __init__(self) -> None:
- self._adapter = V2ProcessAdapter(workflow_llm_enabled=True)
+ self._adapter = V2ProcessAdapter(workflow_llm_enabled=False)
def execute(self, case: V4Case) -> ExecutionPayload:
if not case.rag_session_id:
@@ -27,7 +27,7 @@ class ProcessV2FullChainExecutor:
pipeline_steps = list(payload.details.get("pipeline_steps") or [])
pipeline_summary = {
"answer_mode": str(payload.actual.get("answer_mode") or ""),
- "workflow_llm_enabled": True,
+ "workflow_llm_enabled": False,
"step_count": len(pipeline_steps),
"steps": [str(step.get("step") or "") for step in pipeline_steps if str(step.get("step") or "").strip()],
}
@@ -102,8 +102,8 @@ def _summarize_rows(rows: list[dict]) -> dict[str, object]:
path = str(row.get("path") or "").strip()
layer = str(row.get("layer") or "").strip()
metadata = dict(row.get("metadata") or {})
- domain = str(metadata.get("domain") or "").strip()
- subdomain = str(metadata.get("subdomain") or "").strip()
+ domain = str(row.get("metadata_domain") or metadata.get("domain") or "").strip()
+ subdomain = str(row.get("metadata_subdomain") or metadata.get("subdomain") or "").strip()
if path and path not in paths:
paths.append(path)
if layer and layer not in layers:
diff --git a/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py b/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py
index eaa0ee5..5982c18 100644
--- a/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py
+++ b/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py
@@ -2,8 +2,8 @@ from __future__ import annotations
from dataclasses import asdict
-from app.core.agent.processes.v2.models import V2RouteAnchors, V2RouteResult
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.models import V2RouteAnchors, V2RouteResult
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
diff --git a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py
index 804cb5f..40d57fd 100644
--- a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py
+++ b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py
@@ -3,7 +3,7 @@ from __future__ import annotations
from dataclasses import asdict
from app.core.agent.processes.v2 import V2IntentRouter
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
diff --git a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py
index 524a949..acedc91 100644
--- a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py
+++ b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py
@@ -4,10 +4,10 @@ import asyncio
from dataclasses import asdict
from app.core.agent.processes.v2 import V2IntentRouter
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.rag.persistence.repository import RagRepository
from app.core.rag.retrieval.session_retriever import RagSessionRetriever
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder
from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case
from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm
diff --git a/tests/unit_tests/agent/test_api_endpoint_collector.py b/tests/unit_tests/agent/test_api_endpoint_collector.py
new file mode 100644
index 0000000..5bb8e44
--- /dev/null
+++ b/tests/unit_tests/agent/test_api_endpoint_collector.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
+ ApiEndpointCollector,
+)
+
+
+def test_collector_returns_method_and_path_lines() -> None:
+ rows = [
+ {
+ "metadata": {
+ "endpoint": "GET|POST /actions/{action}",
+ "summary_text": "Endpoint for runtime control actions",
+ },
+ "title": "HTTP API /actions/{action}",
+ "path": "docs/api/control-actions-endpoint.md",
+ "content": "",
+ },
+ {
+ "metadata": {
+ "endpoint": "GET /health",
+ },
+ "title": "HTTP API /health",
+ "path": "docs/api/health-endpoint.md",
+ "content": "",
+ },
+ ]
+
+ endpoints = ApiEndpointCollector().collect(rows)
+
+ assert "GET /actions/{action}" in endpoints
+ assert "POST /actions/{action}" in endpoints
+ assert "GET /health" in endpoints
+
+
+def test_collector_ignores_file_paths_from_content() -> None:
+ rows = [
+ {
+ "metadata": {
+ "endpoint": "GET /health",
+ "summary_text": "Uses src/telegram_notify_app/control_api.py",
+ },
+ "title": "Health endpoint",
+ "path": "docs/api/health-endpoint.md",
+ "content": "See /telegram_notify_app/control_api.py and /telegram_notify_app/worker.py",
+ }
+ ]
+
+ endpoints = ApiEndpointCollector().collect(rows)
+
+ assert endpoints == ["GET /health"]
+
+
+def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None:
+ rows = [
+ {
+ "metadata": {
+ "summary_text": "Control actions endpoint",
+ },
+ "title": "HTTP API /actions/{action}",
+ "path": "docs/api/control-actions-endpoint.md",
+ "content": "",
+ }
+ ]
+
+ endpoints = ApiEndpointCollector().collect(rows)
+
+ assert endpoints == ["GET /actions/{action}"]
diff --git a/tests/unit_tests/agent/test_v2_evidence_ranking.py b/tests/unit_tests/agent/test_v2_evidence_ranking.py
index d7f4a82..c4a0c11 100644
--- a/tests/unit_tests/agent/test_v2_evidence_ranking.py
+++ b/tests/unit_tests/agent/test_v2_evidence_ranking.py
@@ -1,7 +1,7 @@
from __future__ import annotations
-from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
-from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
def _route(*, hints: list[str], terms: list[str], subintent: str = V2Subintent.SUMMARY) -> V2RouteResult:
diff --git a/tests/unit_tests/agent/test_v2_process.py b/tests/unit_tests/agent/test_v2_process.py
index a0fcb02..3ab8ed7 100644
--- a/tests/unit_tests/agent/test_v2_process.py
+++ b/tests/unit_tests/agent/test_v2_process.py
@@ -4,11 +4,11 @@ import asyncio
from dataclasses import dataclass
from app.core.agent.processes.v2 import V2IntentRouter, V2Process
-from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path
-from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
-from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
+from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
+from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path
from app.core.api.domain.models.agent_request import AgentRequest
from app.core.api.domain.models.agent_session import AgentSession
from app.schemas.orchestration import RequestExecutionStatus
diff --git a/tests/unit_tests/agent/test_v2_rag_adapter.py b/tests/unit_tests/agent/test_v2_rag_adapter.py
index b83d21a..bb0dc4b 100644
--- a/tests/unit_tests/agent/test_v2_rag_adapter.py
+++ b/tests/unit_tests/agent/test_v2_rag_adapter.py
@@ -2,8 +2,8 @@ from __future__ import annotations
import asyncio
-from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
from app.core.rag.retrieval.session_retriever import RetrievalPlan
+from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
class FakeRetriever:
@@ -50,6 +50,24 @@ class FakeRetriever:
self.calls.append(("substring", list(path_needles)))
return []
+ async def list_docs_scope_rows(self, _rag_session_id: str, *, limit: int = 8000) -> list[dict]:
+ del limit
+ self.calls.append(("scope_rows", None))
+ return [
+ {
+ "path": "docs/api/health-endpoint.md",
+ "layer": "D1_DOCUMENT_CATALOG",
+ "title": "HTTP API /health",
+ "metadata": {"type": "api_method", "endpoint": "GET /health"},
+ },
+ {
+ "path": "docs/api/send-message-endpoint.md",
+ "layer": "D1_DOCUMENT_CATALOG",
+ "title": "HTTP API /send",
+ "metadata": {"type": "api_method", "endpoint": "GET /send"},
+ },
+ ]
+
def test_v2_rag_adapter_seeds_exact_rows_from_plan_hints() -> None:
adapter = V2RagRetrievalAdapter(FakeRetriever())
@@ -79,3 +97,21 @@ def test_v2_rag_adapter_uses_substring_fallback_for_missing_hint() -> None:
asyncio.run(adapter.fetch_rows("rag-1", "find file", plan))
assert ("substring", ["missing-health-endpoint.md"]) in retriever.calls
+
+
+def test_v2_rag_adapter_applies_query_signal_filter_for_api_exposed() -> None:
+ adapter = V2RagRetrievalAdapter(FakeRetriever())
+ plan = RetrievalPlan(
+ profile="api_exposed",
+ layers=["D1_DOCUMENT_CATALOG"],
+ limit=50,
+ filters={
+ "metadata.type": "api_method",
+ "query_signals": ["health"],
+ },
+ )
+
+ rows = asyncio.run(adapter.fetch_rows("rag-1", "health endpoints", plan))
+
+ assert len(rows) == 1
+ assert rows[0]["path"] == "docs/api/health-endpoint.md"
diff --git a/tests/unit_tests/agent/test_v2_retrieval_policy.py b/tests/unit_tests/agent/test_v2_retrieval_policy.py
index 78742ce..3f43234 100644
--- a/tests/unit_tests/agent/test_v2_retrieval_policy.py
+++ b/tests/unit_tests/agent/test_v2_retrieval_policy.py
@@ -1,7 +1,7 @@
from __future__ import annotations
-from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
-from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
+from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent
+from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver
def _route(
diff --git a/tests/unit_tests/agent/test_v2_scope_grounding.py b/tests/unit_tests/agent/test_v2_scope_grounding.py
new file mode 100644
index 0000000..18b6e21
--- /dev/null
+++ b/tests/unit_tests/agent/test_v2_scope_grounding.py
@@ -0,0 +1,138 @@
+"""Tests for pre-LLM scope grounding from D1/D3 catalog rows (no extra RAG layer)."""
+
+from __future__ import annotations
+
+import json
+
+from app.core.agent.processes.v2 import V2IntentRouter
+from app.core.agent.utils.process_v2.models import V2ScopeType
+
+
+class FakeLlm:
+ def __init__(self, response: str) -> None:
+ self.response = response
+
+ def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str:
+ del prompt_name, user_input
+ return self.response
+
+
+def _llm_ok() -> str:
+ return json.dumps(
+ {
+ "routing_domain": "DOCS",
+ "intent": "DOC_EXPLAIN",
+ "subintent": "SUMMARY",
+ "confidence": 0.9,
+ "reason_short": "ok",
+ },
+ ensure_ascii=False,
+ )
+
+
+def _fixture_rows() -> list[dict]:
+ return [
+ {
+ "layer": "D1_DOCUMENT_CATALOG",
+ "path": "docs/billing/overview.md",
+ "title": "Billing",
+ "content": "",
+ "metadata": {"domain": "billing", "summary_text": "Billing domain overview"},
+ },
+ {
+ "layer": "D1_DOCUMENT_CATALOG",
+ "path": "docs/billing/invoices.md",
+ "title": "Invoices",
+ "content": "",
+ "metadata": {"domain": "billing", "subdomain": "invoice", "tags": ["invoice", "invoices"]},
+ },
+ {
+ "layer": "D3_ENTITY_CATALOG",
+ "path": "docs/domains/order.md",
+ "title": "Order",
+ "content": "",
+ "metadata": {"entity_name": "Order", "domain": "billing"},
+ },
+ {
+ "layer": "D1_DOCUMENT_CATALOG",
+ "path": "docs/api/invoices_post.md",
+ "title": "POST /api/v1/invoices",
+ "content": "",
+ "metadata": {
+ "doc_type": "api_method",
+ "domain": "billing",
+ "endpoint": "/api/v1/invoices",
+ },
+ },
+ {
+ "layer": "D1_DOCUMENT_CATALOG",
+ "path": "docs/widgets/readme.md",
+ "title": "Widgets",
+ "content": "",
+ "metadata": {"domain": "widgets", "summary_text": "Unrelated domain for negative tests"},
+ },
+ ]
+
+
+def _router() -> V2IntentRouter:
+ return V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: _fixture_rows())
+
+
+def test_scope_global_project_wide_enumeration() -> None:
+ r = _router().route("какие api методы есть в проекте", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.GLOBAL
+
+
+def test_scope_domain_billing() -> None:
+ r = _router().route("какие api есть в billing", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.DOMAIN
+ assert r.anchors.process_domain == "billing"
+ assert any(c.value == "billing" for c in r.anchors.candidate_domains)
+
+
+def test_scope_subdomain_billing_invoices() -> None:
+ r = _router().route("какие api есть в billing invoices", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.SUBDOMAIN
+ assert r.anchors.process_domain == "billing"
+ assert r.anchors.process_subdomain == "invoice"
+
+
+def test_scope_entity_order_doc() -> None:
+ r = _router().route("дай доку по Order", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.ENTITY
+ assert "order" in [e.lower() for e in r.anchors.entity_names]
+
+
+def test_scope_entity_endpoint_path() -> None:
+ r = _router().route("где описан POST /api/v1/invoices", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.ENTITY
+ assert "/api/v1/invoices" in r.anchors.endpoint_paths
+
+
+def test_scope_vague_no_false_domain() -> None:
+ r = _router().route("что там с фывырапфыв", rag_session_id="sess-1")
+ assert r.scope_type == V2ScopeType.UNKNOWN
+ assert r.anchors.process_domain is None
+
+
+def test_scope_russian_payments_phrase_matches_tag() -> None:
+ rows = [
+ *_fixture_rows(),
+ {
+ "layer": "D1_DOCUMENT_CATALOG",
+ "path": "docs/billing/payments_ru.md",
+ "title": "Платежи",
+ "content": "",
+ "metadata": {"domain": "billing", "tags": ["платежи"]},
+ },
+ ]
+ router = V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: rows)
+ r = router.route("какие методы есть в платежи", rag_session_id="sess-1")
+ assert r.scope_type in {V2ScopeType.DOMAIN, V2ScopeType.ENTITY, V2ScopeType.SUBDOMAIN}
+ assert r.anchors.process_domain == "billing" or any("платеж" in c.value for c in r.anchors.candidate_entities)
+
+
+def test_router_without_session_skips_db_and_keeps_target_terms() -> None:
+ r = V2IntentRouter(llm=FakeLlm(_llm_ok())).route("Покажи где описан RuntimeHealth и /health")
+ assert r.scope_type == V2ScopeType.UNKNOWN
+ assert "runtimehealth" in r.target_terms
diff --git a/tests/unit_tests/api/test_filesystem_snapshot_resolver.py b/tests/unit_tests/api/test_filesystem_snapshot_resolver.py
new file mode 100644
index 0000000..b47495d
--- /dev/null
+++ b/tests/unit_tests/api/test_filesystem_snapshot_resolver.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import hashlib
+
+from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver
+
+
+def test_augment_adds_untracked_files_from_filesystem(tmp_path) -> None:
+ root = tmp_path / "repo"
+ root.mkdir()
+ docs = root / "docs" / "api"
+ docs.mkdir(parents=True)
+ control_path = docs / "control-actions-endpoint.md"
+ control_content = "---\ndoc_type: api_method\n---\nGET|POST /actions/{action}\n"
+ control_path.write_text(control_content, encoding="utf-8")
+
+ incoming = [
+ {
+ "path": "docs/api/health-endpoint.md",
+ "content": "health",
+ "content_hash": hashlib.sha256(b"health").hexdigest(),
+ }
+ ]
+
+ out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
+ paths = {item["path"] for item in out}
+ assert "docs/api/health-endpoint.md" in paths
+ assert "docs/api/control-actions-endpoint.md" in paths
+
+
+def test_augment_prefers_request_payload_for_existing_path(tmp_path) -> None:
+ root = tmp_path / "repo"
+ root.mkdir()
+ docs = root / "docs" / "api"
+ docs.mkdir(parents=True)
+ file_path = docs / "health-endpoint.md"
+ file_path.write_text("from-disk", encoding="utf-8")
+
+ incoming_content = "from-request"
+ incoming = [
+ {
+ "path": "docs/api/health-endpoint.md",
+ "content": incoming_content,
+ "content_hash": hashlib.sha256(incoming_content.encode("utf-8")).hexdigest(),
+ }
+ ]
+
+ out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
+ by_path = {item["path"]: item for item in out}
+ assert by_path["docs/api/health-endpoint.md"]["content"] == incoming_content
+
+
+def test_augment_ignores_files_outside_root_docs(tmp_path) -> None:
+ root = tmp_path / "repo"
+ root.mkdir()
+ (root / "docs").mkdir()
+ (root / "docs" / "README.md").write_text("docs", encoding="utf-8")
+ (root / "src").mkdir()
+ (root / "src" / "app.py").write_text("print('x')", encoding="utf-8")
+
+ out = FilesystemSnapshotResolver().augment(project_id=str(root), files=[])
+ paths = {item["path"] for item in out}
+ assert "docs/README.md" in paths
+ assert "src/app.py" not in paths
+
+
+def test_augment_keeps_docs_when_request_uses_absolute_paths(tmp_path) -> None:
+ root = tmp_path / "repo"
+ root.mkdir()
+ (root / "docs" / "api").mkdir(parents=True)
+ (root / "docs" / "api" / "health-endpoint.md").write_text("disk", encoding="utf-8")
+
+ absolute_docs = str(root / "docs" / "api" / "control-actions-endpoint.md")
+ absolute_src = str(root / "src" / "app.py")
+ incoming = [
+ {"path": absolute_docs, "content": "req-doc", "content_hash": hashlib.sha256(b"req-doc").hexdigest()},
+ {"path": absolute_src, "content": "req-src", "content_hash": hashlib.sha256(b"req-src").hexdigest()},
+ ]
+
+ out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming)
+ paths = {item["path"] for item in out}
+ assert "docs/api/control-actions-endpoint.md" in paths
+ assert "src/app.py" not in paths
diff --git a/tests/unit_tests/rag/test_docs_indexing_pipeline.py b/tests/unit_tests/rag/test_docs_indexing_pipeline.py
index 03b91ab..288556e 100644
--- a/tests/unit_tests/rag/test_docs_indexing_pipeline.py
+++ b/tests/unit_tests/rag/test_docs_indexing_pipeline.py
@@ -304,3 +304,34 @@ Read health
assert RagLayer.DOCS_INTEGRATION_INDEX in layers
assert "docs integration parse warning" in caplog.text
assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs)
+
+
+def test_docs_pipeline_tolerates_broken_frontmatter_and_keeps_api_type() -> None:
+ pipeline = DocsIndexingPipeline()
+ content = """---
+id: api.control_actions_endpoint
+type: api_method
+doc_type: api_method
+title: HTTP API /actions/{action}
+endpoint: GET|POST /actions/{action}
+links:
+ called_by:
+ - ext.operator
+ tags:
+- api
+---
+# HTTP API /actions/{action}
+
+## Summary
+
+Control actions endpoint.
+"""
+ docs = pipeline.index_file(
+ repo_id="acme/proj",
+ commit_sha="abc123",
+ path="docs/api/control-actions-endpoint.md",
+ content=content,
+ )
+ catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG)
+ assert catalog.metadata["type"] == "api_method"
+ assert catalog.metadata["title"] == "HTTP API /actions/{action}"