diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..af98d06 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.exclude": { + "**/__pycache__": true + } +} diff --git a/_process.zip b/_process.zip index c954819..ce74e52 100644 Binary files a/_process.zip and b/_process.zip differ diff --git a/src/app/core/agent/processes/__init__.py b/src/app/core/agent/processes/__init__.py index 1583712..510c092 100644 --- a/src/app/core/agent/processes/__init__.py +++ b/src/app/core/agent/processes/__init__.py @@ -1,6 +1,6 @@ from app.core.agent.processes.base import AgentProcess, ProcessResult from app.core.agent.processes.v1.process import V1Process -from app.core.agent.processes.v2.process import V2Process +from app.core.agent.processes.v2.v2_process import V2Process __all__ = [ "AgentProcess", diff --git a/src/app/core/agent/processes/base.py b/src/app/core/agent/processes/base.py index 6a49017..9b67fb8 100644 --- a/src/app/core/agent/processes/base.py +++ b/src/app/core/agent/processes/base.py @@ -2,8 +2,11 @@ from __future__ import annotations from abc import ABC, abstractmethod from dataclasses import dataclass +from dataclasses import field from typing import TYPE_CHECKING +from app.schemas.changeset import ChangeItem + if TYPE_CHECKING: from app.core.agent.runtime.execution_context import RuntimeExecutionContext @@ -11,6 +14,8 @@ if TYPE_CHECKING: @dataclass(slots=True) class ProcessResult: answer: str = "" + changeset: list[ChangeItem] = field(default_factory=list) + apply_changeset: bool = False class AgentProcess(ABC): diff --git a/src/app/core/agent/processes/v2/__init__.py b/src/app/core/agent/processes/v2/__init__.py index 3c34915..b2633c3 100644 --- a/src/app/core/agent/processes/v2/__init__.py +++ b/src/app/core/agent/processes/v2/__init__.py @@ -1,4 +1,11 @@ -from app.core.agent.processes.v2.process import V2Process from app.core.agent.processes.v2.intent_router.router import V2IntentRouter __all__ = ["V2IntentRouter", "V2Process"] + + +def __getattr__(name: str): + if name == "V2Process": + from app.core.agent.processes.v2.v2_process import V2Process + + return V2Process + raise AttributeError(name) diff --git a/src/app/core/agent/processes/v2/doc_rules.zip b/src/app/core/agent/processes/v2/doc_rules.zip new file mode 100644 index 0000000..0b5cb5b Binary files /dev/null and b/src/app/core/agent/processes/v2/doc_rules.zip differ diff --git a/src/app/core/agent/processes/v2/doc_rules/README.md b/src/app/core/agent/processes/v2/doc_rules/README.md new file mode 100644 index 0000000..7d5472f --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/README.md @@ -0,0 +1,53 @@ +# Documentation Rules Index + +Этот каталог содержит локализованную проекцию правил построения документации проекта. +Источником истины для структуры и качества документов являются process-документы: +- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/01. Process.md` +- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/04. Analitycs artefacts.md` + +Файлы ниже не должны противоречить этим документам, а лишь конкретизируют их для `test_echo_app`. + +## Порядок использования + +1. Сначала прочитать `global/documentation-system.md`. +2. Затем прочитать `global/frontmatter.md` и `global/linking.md`. +3. Затем выбрать правило из `artifact-types/` по `doc_type`. +4. Затем использовать шаблон из `templates/`. +5. Для уточнения отдельных частей документа использовать правила из `sections/`. + +## Структура каталога + +- `global/` — общие правила системы документации. +- `artifact-types/` — правила по типам артефактов. +- `sections/` — правила для отдельных секций документов. +- `templates/` — шаблоны документов. + +## Содержимое + +### Global +- `global/documentation-system.md` +- `global/frontmatter.md` +- `global/writing-style.md` +- `global/linking.md` +- `global/naming.md` + +### Artifact types +- `artifact-types/api_method.md` +- `artifact-types/logic_block.md` +- `artifact-types/architecture_overview.md` +- `artifact-types/domain_entity.md` +- `artifact-types/ui_page.md` +- `artifact-types/integration_doc.md` + +### Sections +- `sections/summary.md` +- `sections/details.md` +- `sections/api-scenario.md` +- `sections/api-contract.md` +- `sections/requirements-format.md` + +### Templates +- `templates/api_method.template.md` +- `templates/logic_block.template.md` +- `templates/architecture_overview.template.md` +- `templates/domain_entity.template.md` diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md new file mode 100644 index 0000000..bfc6428 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/api_method.md @@ -0,0 +1,39 @@ +# API Method Rules + +## Назначение + +Этот файл задает правила для документов типа `api_method`. + +## Когда использовать + +Использовать для описания одного HTTP endpoint или одного отдельного API метода. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# ` +- `## Summary` +- `## Details` + +Внутри `## Details` обязательны: +- `### Описание` +- `### Сценарий` +- `### Функциональные требования` +- `### Нефункциональные требования` +- `### Контракт` + +## Особые правила + +- Сценарий оформляется как технический use case. +- Функциональные требования маркируются `FR-*`. +- Нефункциональные требования маркируются `NFR-*`. +- Контракт должен быть пригоден для последующей сборки OpenAPI. +- Если у метода есть интеграции, они выносятся в `### Интеграции`. +- Ошибки и HTTP-коды либо описываются в `### Ошибки`, либо ссылаются на централизованный каталог ошибок. + +## Ошибки оформления + +- Нельзя заменять контракт общим текстовым описанием. +- Нельзя смешивать несколько endpoint в одном документе. +- Нельзя хранить связи и навигацию вне frontmatter. diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md new file mode 100644 index 0000000..e4b146e --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/architecture_overview.md @@ -0,0 +1,31 @@ +# Architecture Overview Rules + +## Назначение + +Этот файл задает правила для документов типа `architecture_overview`. + +## Когда использовать + +Использовать как входной документ для понимания системы, модуля или сервиса. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# <title>` +- `## Summary` +- `## Details` + +## Что описывать в Details + +- границы системы +- основные компоненты +- ключевые взаимодействия +- интеграционные сценарии +- главные ограничения +- ссылки на дочерние документы по API, logic, domain и другим артефактам + +## Ошибки оформления + +- Нельзя дублировать в архитектурном обзоре полные API-контракты. +- Нельзя делать архитектурный обзор единственным документом на всю систему без декомпозиции. diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md new file mode 100644 index 0000000..c533266 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/domain_entity.md @@ -0,0 +1,30 @@ +# Domain Entity Rules + +## Назначение + +Этот файл задает правила для документов типа `domain_entity`. + +## Когда использовать + +Использовать для описания одной доменной сущности, ее смысла, состояния и роли в системе. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# <title>` +- `## Summary` +- `## Details` + +## Что описывать в Details + +- смысл сущности +- ключевые атрибуты +- состояния или инварианты +- использование сущности в системе +- интеграции с API, workflow или внешними потребителями, если они важны для понимания модели + +## Ошибки оформления + +- Нельзя смешивать несколько независимых сущностей в одном документе. +- Нельзя подменять доменную сущность описанием endpoint или workflow. diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md new file mode 100644 index 0000000..7f1fe35 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/integration_doc.md @@ -0,0 +1,25 @@ +# Integration Doc Rules + +## Назначение + +Этот файл задает правила для документов типа `integration_doc`. + +## Когда использовать + +Использовать для описания интеграции между системами, сервисами или внешними провайдерами. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# <title>` +- `## Summary` +- `## Details` + +## Что описывать в Details + +- цель интеграции +- участвующие стороны +- направление обмена +- ключевой сценарий взаимодействия +- ограничения и риски diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md new file mode 100644 index 0000000..788e4f7 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/logic_block.md @@ -0,0 +1,31 @@ +# Logic Block Rules + +## Назначение + +Этот файл задает правила для документов типа `logic_block`. + +## Когда использовать + +Использовать для описания одного законченного блока логики, workflow или процесса. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# <title>` +- `## Summary` +- `## Details` + +## Что описывать в Details + +- назначение логического блока +- входы и выходы +- последовательность выполнения +- интеграции +- ключевые ограничения +- состояние и ошибки, если они важны для понимания блока + +## Ошибки оформления + +- Нельзя описывать весь модуль целиком, если логика распадается на несколько независимых блоков. +- Нельзя превращать документ в пересказ исходного кода построчно. diff --git a/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md b/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md new file mode 100644 index 0000000..9640671 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/artifact-types/ui_page.md @@ -0,0 +1,24 @@ +# UI Page Rules + +## Назначение + +Этот файл задает правила для документов типа `ui_page`. + +## Когда использовать + +Использовать для описания одной пользовательской страницы, экрана или отдельного UI-сценария. + +## Обязательная структура + +Документ должен содержать: +- YAML frontmatter +- `# <title>` +- `## Summary` +- `## Details` + +## Что описывать в Details + +- назначение страницы +- пользовательский сценарий +- основные блоки интерфейса +- связанные API и сущности diff --git a/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md b/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md new file mode 100644 index 0000000..1be0884 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/documentation-rules.md @@ -0,0 +1,71 @@ +# Documentation Rules + +Этот каталог оформляет MVP документации проекта в атомарном формате. + +## Базовая структура + +- Каждый документ содержит YAML frontmatter. +- В документе должен быть один `H1`, совпадающий с `title`. +- Основные разделы оформляются как `## Summary` и `## Details`. +- Внутри `Details` используются заголовки уровня `###` и ниже. +- Связи, сущности и навигация описываются во frontmatter через `related_docs`, `links`, `entities`, `parent`, `children`. + +## Summary + +- Краткий explain-слой быстрого контекста. +- Должен позволять быстро понять назначение документа без чтения `Details`. +- Предпочтительный формат: компактный список ключевых фактов без длинных абзацев. + +## Details + +- Раскрывает полное описание объекта. +- Структура `Details` зависит от типа документа. +- Сценарии, ограничения, интеграции, ошибки и кодовые привязки должны быть разнесены по отдельным подразделам. + +## API documents + +Для `api_method` внутри `## Details` обязательны разделы: +- `### Описание` +- `### Сценарий` +- `### Функциональные требования` +- `### Нефункциональные требования` +- `### Контракт` + +Если у метода есть интеграции и ошибки, также обязательны: +- `### Интеграции` +- `### Ошибки` +- `### Связанный код` +- `### История изменений` + +### Сценарий + +Сценарий оформляется как технический use case и содержит: +- название +- предусловия +- триггер +- основной сценарий +- альтернативный сценарий +- обработку ошибок +- постусловие + +### Требования + +- Функциональные требования маркируются как `FR-1`, `FR-2`, ... +- Нефункциональные требования маркируются как `NFR-1`, `NFR-2`, ... +- Идентификаторы требований локальны в рамках одного документа. + +### Контракт + +Контракт должен быть пригоден для последующей сборки OpenAPI-спецификации и включать: +- входные параметры +- выходные параметры +- структуру JSON-сообщений +- обязательность полей +- типы и ограничения +- описание полей +- правила заполнения +- примеры данных +- auth +- idempotency +- timeout +- ошибки и их HTTP-коды diff --git a/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md b/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md new file mode 100644 index 0000000..656b569 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/global/documentation-system.md @@ -0,0 +1,38 @@ +# Documentation System + +## Назначение + +Этот файл задает общую модель документации проекта. + +## Базовая модель + +Каждый документ должен состоять из двух слоев: +- YAML frontmatter +- контент + +Контент всегда состоит из двух обязательных разделов: +- `## Summary` +- `## Details` + +Над ними должен быть один заголовок `# <title>`, совпадающий со значением `title` во frontmatter. + +## Принципы + +- Документы должны быть атомарными. +- Один документ описывает одну тему. +- Вместо дублирования между документами используются явные ссылки. +- Связи и навигация должны быть формализованы. +- Документы должны быть пригодны для чтения человеком и для RAG. +- Документы должны быть пригодны для частичного обновления без деградации структуры. + +## Типы документов + +На уровне проекта поддерживаются типы: +- `api_method` +- `logic_block` +- `architecture_overview` +- `domain_entity` +- `ui_page` +- `integration_doc` +- `index_page` +- `glossary_item` diff --git a/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md b/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md new file mode 100644 index 0000000..2b4dee5 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/global/frontmatter.md @@ -0,0 +1,67 @@ +# Frontmatter Rules + +## Назначение + +Этот файл описывает единый контракт YAML frontmatter для всех документов. + +## Обязательные поля + +```yaml +id: string +title: string +doc_type: string +domain: string +sub_domain: string +related_docs: [] +status: string +``` + +## Поля совместимости и рекомендуемые поля + +```yaml +type: string +name: string +module: string +layer: string +updated_at: YYYY-MM-DD +tags: [] +entities: [] +parent: string | null +children: [] +links: {} +source_of_truth: string +related_code: [] +system_analytics_refs: [] +``` + +## Правила + +- `id` должен быть стабильным и уникальным в пределах документации проекта. +- `title` — человекочитаемый заголовок. +- `doc_type` — канонический тип документа. +- `domain` и `sub_domain` определяют бизнес-контекст документа. +- `related_docs` хранит явные связи с другими markdown-документами. +- `status` хранит жизненный цикл документа: например `draft`, `approved`, `active`. +- `type` допустимо дублировать как alias для tooling-совместимости с индексаторами. +- `name` — короткое системное имя документа. +- `module` — модуль или подсистема. +- `layer` — слой системы. +- `updated_at` хранится в формате `YYYY-MM-DD`. + +## Связи и навигация + +- `entities` описывает сущности, связанные с документом. +- `parent` и `children` описывают иерархию. +- `links` описывает typed graph связей между документами, кодом и интеграциями. + +## Формат links + +```yaml +links: + called_by: + - ext.health_probe + uses_logic: + - logic.some_flow + integrates_with: + - ext.some_system +``` diff --git a/src/app/core/agent/processes/v2/doc_rules/global/linking.md b/src/app/core/agent/processes/v2/doc_rules/global/linking.md new file mode 100644 index 0000000..6e49171 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/global/linking.md @@ -0,0 +1,33 @@ +# Linking Rules + +## Назначение + +Этот файл описывает, как связывать документы между собой. + +## Иерархия + +- `parent` используется для родительского документа. +- `children` используется для прямых дочерних документов. +- Иерархия должна быть осмысленной и стабильной. +- Для общей точки входа допустим `index_page`. + +## Графовые связи + +Для `related_docs` используются ссылки на соседние документы. + +Для `links` рекомендуется использовать typed-ключи: +- `called_by` +- `uses_logic` +- `reads_db` +- `writes_db` +- `integrates_with` +- `used_by` +- `exposes_api` +- `uses_entities` + +## Правила использования + +- Если документ логически входит в другой, использовать `parent`/`children`. +- Если связь нужна для навигации между равноправными документами, дублировать ее в `related_docs`. +- Если связь отражает поведение, интеграции или переиспользование, фиксировать ее в `links`. +- Детальное описание интеграций хранить в body документа, а не только во frontmatter. diff --git a/src/app/core/agent/processes/v2/doc_rules/global/naming.md b/src/app/core/agent/processes/v2/doc_rules/global/naming.md new file mode 100644 index 0000000..c722416 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/global/naming.md @@ -0,0 +1,24 @@ +# Naming Rules + +## Назначение + +Этот файл описывает правила именования документов, файлов и идентификаторов. + +## Правила для файлов + +- Имена файлов должны быть в kebab-case. +- Имя файла должно отражать одну тему. +- Для шаблонов использовать суффикс `.template.md`. + +## Правила для id + +- `id` строится в формате `<type-group>.<name>`. +- Примеры: + - `api.send_message_endpoint` + - `logic.telegram_notification_loop` + - `architecture.telegram_notify_app` + +## Правила для title + +- `title` должен быть кратким и человекочитаемым. +- В `title` допускаются пробелы и естественный язык. diff --git a/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md b/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md new file mode 100644 index 0000000..6c1caec --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/global/writing-style.md @@ -0,0 +1,19 @@ +# Writing Style + +## Назначение + +Этот файл задает правила стиля для текстового наполнения документации. + +## Правила стиля + +- Текст должен быть лаконичным. +- Формулировки должны быть точными и техническими. +- Summary должен быть кратким explain-слоем. +- Details должен раскрывать суть без лишней воды. +- Нежелательно смешивать несколько тем в одном документе. +- Если детали относятся к другому артефакту, их нужно выносить в отдельный документ. + +## Язык + +- Основной язык документации — русский. +- Технические термины, названия классов, API, RAG, OpenAPI, runtime и другие устоявшиеся identifiers можно оставлять на английском. diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md b/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md new file mode 100644 index 0000000..fc313ea --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/sections/api-contract.md @@ -0,0 +1,24 @@ +# API Contract Rules + +## Назначение + +Этот файл описывает, как оформлять подраздел `## Контракт` в API-документах. + +## Что должно быть описано + +- входные параметры +- выходные параметры +- JSON-структуры запросов и ответов +- обязательность полей +- типы полей +- ограничения +- описание назначения полей +- примеры данных +- auth +- idempotency +- timeout +- ошибки и их HTTP-коды + +## Правило качества + +Контракт должен быть достаточно формальным, чтобы по нему можно было собрать OpenAPI-спецификацию. diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md b/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md new file mode 100644 index 0000000..c9066af --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/sections/api-scenario.md @@ -0,0 +1,21 @@ +# API Scenario Rules + +## Назначение + +Этот файл описывает, как оформлять подраздел `### Сценарий` в API-документах. + +## Обязательные части + +- название +- предусловия +- триггер +- основной сценарий +- альтернативный сценарий +- обработка ошибок +- постусловие + +## Правила + +- Сценарий должен быть лаконичным. +- Сценарий должен отражать суть шага. +- Сложные технические детали надо выносить в `FR-*`. diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/details.md b/src/app/core/agent/processes/v2/doc_rules/sections/details.md new file mode 100644 index 0000000..33ed824 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/sections/details.md @@ -0,0 +1,13 @@ +# Details Section Rules + +## Назначение + +Этот файл задает общие правила для секции `## Details`. + +## Правила + +- `Details` оформляется как `## Details`. +- Внутри `Details` используются заголовки уровня `###` и ниже. +- Структура Details зависит от типа документа. +- В Details не нужно повторно дублировать навигацию и связи, если они уже есть во frontmatter. +- Интеграции, ошибки и кодовые привязки должны быть выделены в отдельные подразделы, если они существенны для понимания документа. diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md b/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md new file mode 100644 index 0000000..14eb0c3 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/sections/requirements-format.md @@ -0,0 +1,16 @@ +# Requirements Format Rules + +## Назначение + +Этот файл задает формат для функциональных и нефункциональных требований. + +## Функциональные требования + +- Использовать коды `FR-1`, `FR-2`, `FR-3` и так далее. +- Каждое требование должно описывать отдельный обязательный аспект поведения. +- Идентификаторы локальны в пределах одного документа. + +## Нефункциональные требования + +- Использовать коды `NFR-1`, `NFR-2`, `NFR-3` и так далее. +- Требования должны описывать характеристики качества, ограничения и эксплуатационные свойства. diff --git a/src/app/core/agent/processes/v2/doc_rules/sections/summary.md b/src/app/core/agent/processes/v2/doc_rules/sections/summary.md new file mode 100644 index 0000000..7f7fa7b --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/sections/summary.md @@ -0,0 +1,13 @@ +# Summary Section Rules + +## Назначение + +Этот файл задает правила для секции `## Summary`. + +## Правила + +- Summary должен быть коротким explain-слоем быстрого контекста. +- Summary должен объяснять суть документа без лишних деталей. +- Summary должен быть пригоден для explain и быстрого чтения. +- Предпочтительный формат: список ключевых фактов `Purpose`, `Actor`, `Trigger`, `Errors`, `Related ...` и т.д. +- Для крупных документов допустим более длинный summary, если он остается структурированным. diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md new file mode 100644 index 0000000..ba2ac07 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/templates/api_method.template.md @@ -0,0 +1,84 @@ +--- +id: api.example_method +type: api_method +doc_type: api_method +name: example_method +title: HTTP API /example +module: example_module +layer: application +domain: example_domain +sub_domain: example_subdomain +related_docs: [] +status: draft +updated_at: 2026-03-20 +source_of_truth: code +parent: null +children: [] +tags: [] +entities: [] +links: {} +--- + +# HTTP API /example + +## Summary + +Краткое описание метода. + +## Details + +## Описание + +Короткое описание сути метода. + +## Сценарий + +**Название:** + +**Предусловия:** +- + +**Триггер:** +- + +**Основной сценарий:** +1. + +**Альтернативный сценарий:** +1. + +**Обработка ошибок:** +1. + +**Постусловие:** +- + +## Функциональные требования + +**FR-1.** + +## Нефункциональные требования + +**NFR-1.** + +## Контракт + +### Входные параметры + +| Параметр | Где передается | Тип | Обязательность | Ограничения | Описание | Пример | +|---|---|---|---|---|---|---| +| | | | | | | | + +### Выходные параметры + +| Поле | Тип | Обязательность | Ограничения | Описание | Заполнение | Пример | +|---|---|---|---|---|---|---| +| | | | | | | | + +### Интеграции + +### Ошибки + +### Связанный код + +### История изменений diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md new file mode 100644 index 0000000..21c7319 --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/templates/architecture_overview.template.md @@ -0,0 +1,48 @@ +--- +id: architecture.example_system +type: architecture_overview +doc_type: architecture_overview +name: example_system +title: Обзор архитектуры Example System +module: example_module +layer: system +domain: example_domain +sub_domain: example_subdomain +related_docs: [] +status: draft +updated_at: 2026-03-20 +source_of_truth: mixed +parent: null +children: [] +tags: [] +entities: [] +links: {} +--- + +# Обзор архитектуры Example System + +## Summary + +Краткое описание архитектуры. + +## Details + +### Описание + +### Контекст + +### Границы системы + +### Компоненты + +### Интеграционные сценарии + +### Интеграции + +### Ограничения + +### Связанный код + +### Связанные документы + +### История изменений diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md new file mode 100644 index 0000000..f8fd65e --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/templates/domain_entity.template.md @@ -0,0 +1,48 @@ +--- +id: domain.example_entity +type: domain_entity +doc_type: domain_entity +name: example_entity +title: Пример доменной сущности +module: example_module +layer: domain +domain: example_domain +sub_domain: example_subdomain +related_docs: [] +status: draft +updated_at: 2026-03-20 +source_of_truth: code +parent: null +children: [] +tags: [] +entities: [] +links: {} +--- + +# Пример доменной сущности + +## Summary + +Краткое описание сущности. + +## Details + +### Описание + +### Модель данных + +### Состояния и инварианты + +### Технический use case + +### Функциональные требования + +### Нефункциональные требования + +### Интеграции + +### Связанный код + +### Связанные документы + +### История изменений diff --git a/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md b/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md new file mode 100644 index 0000000..36e1d5d --- /dev/null +++ b/src/app/core/agent/processes/v2/doc_rules/templates/logic_block.template.md @@ -0,0 +1,50 @@ +--- +id: logic.example_block +type: logic_block +doc_type: logic_block +name: example_block +title: Пример блока логики +module: example_module +layer: application +domain: example_domain +sub_domain: example_subdomain +related_docs: [] +status: draft +updated_at: 2026-03-20 +source_of_truth: code +parent: null +children: [] +tags: [] +entities: [] +links: {} +--- + +# Пример блока логики + +## Summary + +Краткое описание блока логики. + +## Details + +### Описание + +### Контекст + +### Технический use case + +### Функциональные требования + +### Нефункциональные требования + +### Интеграции + +### Ограничения и условия вызова + +### Ошибки и деградации + +### Связанные API + +### Связанный код + +### История изменений diff --git a/src/app/core/agent/processes/v2/intent_router/models.py b/src/app/core/agent/processes/v2/intent_router/models.py index a054940..91c04d3 100644 --- a/src/app/core/agent/processes/v2/intent_router/models.py +++ b/src/app/core/agent/processes/v2/intent_router/models.py @@ -16,3 +16,4 @@ class QueryFeatures: logic_markers: list[str] domain_markers: list[str] endpoint_markers: list[str] + scope_type: str = "unknown" diff --git a/src/app/core/agent/processes/v2/intent_router/modules/anchors.py b/src/app/core/agent/processes/v2/intent_router/modules/anchors.py index 61d83d5..b9ca68c 100644 --- a/src/app/core/agent/processes/v2/intent_router/modules/anchors.py +++ b/src/app/core/agent/processes/v2/intent_router/modules/anchors.py @@ -4,7 +4,7 @@ import re from dataclasses import dataclass from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis -from app.core.agent.processes.v2.models import V2RouteAnchors +from app.core.agent.utils.process_v2.models import V2RouteAnchors @dataclass(slots=True) diff --git a/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py b/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py new file mode 100644 index 0000000..15e9eb1 --- /dev/null +++ b/src/app/core/agent/processes/v2/intent_router/modules/scope_catalog.py @@ -0,0 +1,176 @@ +"""Build an in-memory DOCS scope index from D1/D3 catalog rows (no chunk retrieval). + +Parses metadata from ``D1_DOCUMENT_CATALOG`` and ``D3_ENTITY_CATALOG`` rows produced by the +existing RAG indexer—no additional layers or chunk scans. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field + + +def _norm_text(value: object) -> str: + return re.sub(r"\s+", " ", str(value or "").strip().lower()) + + +def _split_multi(value: object) -> list[str]: + if value is None: + return [] + if isinstance(value, list): + raw = value + else: + raw = re.split(r"[;,|]", str(value)) + out: list[str] = [] + for item in raw: + s = str(item).strip() + if s: + out.append(s) + return out + + +@dataclass(slots=True) +class DocsScopeCatalog: + """Flattened terms from D1_DOCUMENT_CATALOG and D3_ENTITY_CATALOG for lexical grounding.""" + + domain_values: set[str] = field(default_factory=set) + subdomain_pairs: list[tuple[str, str]] = field(default_factory=list) # (domain, subdomain) + entity_records: list[dict[str, object]] = field(default_factory=list) + api_records: list[dict[str, object]] = field(default_factory=list) + + +def build_docs_scope_catalog(rows: list[dict]) -> DocsScopeCatalog: + """Derive searchable terms from catalog layers only (existing RAG index rows).""" + catalog = DocsScopeCatalog() + for row in rows: + layer = str(row.get("layer") or "") + meta = row.get("metadata") + if not isinstance(meta, dict): + meta = {} + path = str(row.get("path") or "") + title = str(row.get("title") or "") + content = str(row.get("content") or "") + + if layer == "D1_DOCUMENT_CATALOG": + _ingest_d1_row(catalog, path=path, title=title, content=content, metadata=meta) + elif layer == "D3_ENTITY_CATALOG": + _ingest_d3_row(catalog, path=path, title=title, metadata=meta) + + return catalog + + +def _ingest_d1_row( + catalog: DocsScopeCatalog, + *, + path: str, + title: str, + content: str, + metadata: dict, +) -> None: + doc_type = _norm_text(metadata.get("type") or metadata.get("doc_type")) + domain = _norm_text(metadata.get("domain")) + subdomain = _norm_text(metadata.get("subdomain")) + name = _norm_text(metadata.get("name")) + summary = _norm_text(metadata.get("summary_text")) + endpoint = _norm_text(metadata.get("endpoint")) + + entities = [_norm_text(e) for e in _split_multi(metadata.get("entities"))] + tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))] + + if domain: + catalog.domain_values.add(domain) + if domain and subdomain: + catalog.subdomain_pairs.append((domain, subdomain)) + + blob = " ".join(x for x in (name, title, summary, content) if x) + for ent in entities: + if ent: + catalog.entity_records.append( + { + "name": ent, + "domain": domain or None, + "subdomain": subdomain or None, + "source_layer": "D1_DOCUMENT_CATALOG", + "path": path, + "blob": blob, + } + ) + for tag in tags: + if tag and len(tag) >= 3: + catalog.entity_records.append( + { + "name": tag, + "domain": domain or None, + "subdomain": subdomain or None, + "source_layer": "D1_DOCUMENT_CATALOG", + "path": path, + "blob": blob, + } + ) + + is_api_method = doc_type == "api_method" or "api_method" in path.lower() + if is_api_method or endpoint: + ep = endpoint or _endpoint_from_title(title) + if ep: + catalog.api_records.append( + { + "endpoint": ep, + "domain": domain or None, + "source_layer": "D1_DOCUMENT_CATALOG", + "path": path, + "title": title, + } + ) + + +def _ingest_d3_row( + catalog: DocsScopeCatalog, + *, + path: str, + title: str, + metadata: dict, +) -> None: + entity_name = str(metadata.get("entity_name") or "").strip() + domain = _norm_text(metadata.get("domain")) + subdomain = _norm_text(metadata.get("subdomain")) + module = _norm_text(metadata.get("module")) + source_path = str(metadata.get("source_path") or "").strip() + tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))] + + if domain: + catalog.domain_values.add(domain) + if domain and subdomain: + catalog.subdomain_pairs.append((domain, subdomain)) + + blob = " ".join( + _norm_text(x) + for x in (entity_name, title, module, source_path, " ".join(tags)) + if x + ) + if entity_name: + catalog.entity_records.append( + { + "name": _norm_text(entity_name), + "domain": domain or None, + "subdomain": subdomain or None, + "module": module or None, + "source_layer": "D3_ENTITY_CATALOG", + "path": path or source_path, + "blob": blob, + } + ) + + +def _endpoint_from_title(title: str) -> str: + t = str(title or "").strip() + if not t: + return "" + upper = t.upper() + for method in ("GET ", "POST ", "PUT ", "PATCH ", "DELETE "): + if method in upper: + idx = upper.index(method) + rest = t[idx:].split() + if len(rest) >= 2 and rest[1].startswith("/"): + return _norm_text(rest[1]) + m = re.search(r"(\/[a-z0-9_./{}-]+)", t, re.IGNORECASE) + return _norm_text(m.group(1)) if m else "" diff --git a/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py b/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py new file mode 100644 index 0000000..b61f967 --- /dev/null +++ b/src/app/core/agent/processes/v2/intent_router/modules/scope_resolver.py @@ -0,0 +1,443 @@ +"""Deterministic scope resolution from query + derived DOCS catalog (pre-LLM). + +Matches the user query against catalog terms (exact / normalized). Optional embedding-based +retrieval can extend candidates later; final ``scope_type`` never relies on embeddings alone. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field + +from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog +from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis +from app.core.agent.utils.process_v2.models import ScopeCandidate, V2ScopeType + + +_SCORE_EXACT = 1.0 +_SCORE_NORMALIZED = 0.88 +_SCORE_SOFT = 0.72 +_STRONG_THRESHOLD = 0.85 + +_ENUM_MARKERS_RU = ( + "какие ", + "какие\n", + "какой ", + "какого ", + "список", + "перечисли", + "перечислить", + "все api", + "все методы", + "какие api", + "какие методы", + "каких ", +) +_SINGLE_SEGMENT_ENDPOINT_ALLOWLIST = frozenset( + { + "/health", + "/send", + "/healthz", + "/ready", + "/live", + "/metrics", + } +) + +_PROJECT_WIDE_MARKERS = ( + "в проекте", + "в системе", + "в приложении", + "по проекту", + "во всем проекте", + "overall", + "in the project", +) + + +@dataclass(slots=True) +class ScopeResolution: + scope_type: str = V2ScopeType.UNKNOWN + candidate_domains: list[ScopeCandidate] = field(default_factory=list) + candidate_subdomains: list[ScopeCandidate] = field(default_factory=list) + candidate_entities: list[ScopeCandidate] = field(default_factory=list) + candidate_apis: list[ScopeCandidate] = field(default_factory=list) + strong_domain: str | None = None + strong_subdomain: str | None = None + strong_entity_names: list[str] = field(default_factory=list) + strong_endpoint_paths: list[str] = field(default_factory=list) + catalog_loaded: bool = False + + +def _catalog_has_index_terms(catalog: DocsScopeCatalog) -> bool: + return bool( + catalog.domain_values + or catalog.subdomain_pairs + or catalog.entity_records + or catalog.api_records + ) + + +def plausible_doc_endpoint_paths(paths: list[str]) -> list[str]: + """Drop spurious ``/token`` paths from api-like heuristics (e.g. ``/billing`` after ``api``).""" + out: list[str] = [] + for raw in paths: + p = str(raw or "").strip().lower() + if not p.startswith("/"): + continue + segments = [s for s in p.split("/") if s] + if len(segments) >= 2: + out.append(p) + continue + if len(segments) == 1 and p in _SINGLE_SEGMENT_ENDPOINT_ALLOWLIST: + out.append(p) + continue + return out + + +def resolve_docs_scope( + normalized_query: str, + terms: TargetTermsAnalysis, + catalog: DocsScopeCatalog | None, +) -> ScopeResolution: + """Lexical scope resolution; embeddings never set final scope alone (not used here).""" + resolution = ScopeResolution() + if catalog is None: + return resolution + if not _catalog_has_index_terms(catalog): + return resolution + + resolution.catalog_loaded = True + query_l = _norm_query(normalized_query) + if not query_l: + resolution.scope_type = V2ScopeType.UNKNOWN + return resolution + + _collect_domain_candidates(query_l, catalog, resolution) + _collect_subdomain_candidates(query_l, catalog, resolution) + _collect_entity_candidates(query_l, catalog, resolution) + _collect_api_candidates(query_l, catalog, resolution) + + _dedupe_candidates(resolution) + + endpoint_paths = plausible_doc_endpoint_paths(list(terms.endpoint_paths)) + strong_api = _pick_strong(resolution.candidate_apis) + strong_entity = _pick_strong(resolution.candidate_entities) + strong_sub = _pick_strong(resolution.candidate_subdomains) + strong_dom = _pick_strong(resolution.candidate_domains) + + resolution.strong_endpoint_paths = list(dict.fromkeys(endpoint_paths)) + + if endpoint_paths: + resolution.scope_type = V2ScopeType.ENTITY + resolution.strong_entity_names = _merge_unique(resolution.strong_entity_names, _entities_for_endpoints(endpoint_paths, catalog)) + return resolution + + if strong_api and strong_api.score >= _STRONG_THRESHOLD: + resolution.scope_type = V2ScopeType.ENTITY + resolution.strong_endpoint_paths = _merge_unique(resolution.strong_endpoint_paths, [strong_api.value]) + return resolution + + strong_sub_pre = _pick_strong(resolution.candidate_subdomains) + if ( + strong_sub_pre + and strong_sub_pre.score >= _STRONG_THRESHOLD + and _subdomain_aligned_with_query(query_l, strong_sub_pre.value) + ): + resolution.scope_type = V2ScopeType.SUBDOMAIN + parts = _split_subdomain_value(strong_sub_pre.value) + if parts: + resolution.strong_domain = parts[0] + resolution.strong_subdomain = parts[1] + return resolution + + if strong_entity and strong_entity.score >= _STRONG_THRESHOLD: + resolution.scope_type = V2ScopeType.ENTITY + resolution.strong_entity_names = _merge_unique( + resolution.strong_entity_names, + [strong_entity.value], + ) + return resolution + + if strong_sub and strong_sub.score >= _STRONG_THRESHOLD: + resolution.scope_type = V2ScopeType.SUBDOMAIN + parts = _split_subdomain_value(strong_sub.value) + if parts: + resolution.strong_domain = parts[0] + resolution.strong_subdomain = parts[1] + return resolution + + if strong_dom and strong_dom.score >= _STRONG_THRESHOLD: + resolution.scope_type = V2ScopeType.DOMAIN + resolution.strong_domain = strong_dom.value + return resolution + + if _is_global_enumeration(query_l, has_strong_any=bool(_any_strong(resolution))): + resolution.scope_type = V2ScopeType.GLOBAL + return resolution + + resolution.scope_type = V2ScopeType.UNKNOWN + return resolution + + +def promote_target_terms( + raw_terms: list[str], + terms: TargetTermsAnalysis, + resolution: ScopeResolution, +) -> list[str]: + """Keep only high-confidence terms in ``target_terms``; weak matches stay in candidate_* only.""" + if not resolution.catalog_loaded: + return list(raw_terms) + out: list[str] = [] + strong_values = {c.value for c in _all_candidates(resolution) if c.score >= _STRONG_THRESHOLD} + strong_values |= {c.value for c in _all_candidates(resolution) if c.match_type == "exact"} + strong_entity = set(resolution.strong_entity_names) + endpoints = set(terms.endpoint_paths) + aliases = set(terms.matched_aliases) + + for term in raw_terms: + t = str(term or "").strip() + if not t: + continue + tl = t.lower() + if t in endpoints or tl in {e.lower() for e in endpoints}: + _append_unique(out, tl if tl.startswith("/") else t) + continue + if t in aliases or tl in {a.lower() for a in aliases}: + _append_unique(out, tl) + continue + if tl in strong_values or t in strong_entity: + _append_unique(out, tl) + continue + if _is_explicit_identifier(t) and tl in strong_entity: + _append_unique(out, tl) + continue + # Drop weak/ungrounded terms (remain only in candidates on anchors) + return out + + +def _all_candidates(resolution: ScopeResolution) -> list[ScopeCandidate]: + return [ + *resolution.candidate_domains, + *resolution.candidate_subdomains, + *resolution.candidate_entities, + *resolution.candidate_apis, + ] + + +def _any_strong(resolution: ScopeResolution) -> bool: + return any(c.score >= _STRONG_THRESHOLD for c in _all_candidates(resolution)) + + +def _pick_strong(candidates: list[ScopeCandidate]) -> ScopeCandidate | None: + if not candidates: + return None + return max(candidates, key=lambda c: (c.score, len(c.value))) + + +def _norm_query(q: str) -> str: + return re.sub(r"\s+", " ", str(q or "").strip().lower()) + + +def _append_unique(items: list[str], value: str) -> None: + if value and value not in items: + items.append(value) + + +def _merge_unique(a: list[str], b: list[str]) -> list[str]: + return list(dict.fromkeys([*a, *b])) + + +def _is_explicit_identifier(token: str) -> bool: + return bool(re.fullmatch(r"[A-Za-z][A-Za-z0-9_]+", token)) + + +def _split_subdomain_value(value: str) -> tuple[str, str] | None: + parts = str(value or "").split("::", 1) + if len(parts) == 2 and parts[0] and parts[1]: + return parts[0].strip().lower(), parts[1].strip().lower() + return None + + +def _subdomain_aligned_with_query(query_l: str, composite: str) -> bool: + """True when both domain and subdomain tokens match the query (substring / token match).""" + parts = str(composite or "").split("::", 1) + if len(parts) != 2: + return False + dom, sub = parts[0].strip().lower(), parts[1].strip().lower() + s_dom, _ = _match_score(query_l, dom) + s_sub, _ = _match_score(query_l, sub) + return s_dom > 0 and s_sub > 0 + + +def _entities_for_endpoints(endpoint_paths: list[str], catalog: DocsScopeCatalog) -> list[str]: + found: list[str] = [] + eps = {e.lower() for e in endpoint_paths if e} + for rec in catalog.entity_records: + blob = str(rec.get("blob") or "").lower() + name = str(rec.get("name") or "").strip().lower() + if not name: + continue + if any(ep and ep in blob for ep in eps): + _append_unique(found, name) + return found + + +def _collect_domain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None: + for dom in catalog.domain_values: + if not dom: + continue + score, mtype = _match_score(query_l, dom) + if score <= 0: + continue + resolution.candidate_domains.append( + ScopeCandidate( + value=dom, + score=score, + source_layer="D1_DOCUMENT_CATALOG", + match_type=mtype, + ) + ) + + +def _collect_subdomain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None: + seen: set[str] = set() + for dom, sub in catalog.subdomain_pairs: + if not dom or not sub: + continue + composite = f"{dom}::{sub}" + if composite in seen: + continue + seen.add(composite) + score_dom, _ = _match_score(query_l, dom) + score_sub, mt_sub = _match_score(query_l, sub) + phrase = _phrase_score(query_l, dom, sub) + if phrase > 0: + score = phrase + mt = "normalized" + elif score_dom > 0 and score_sub > 0: + score = min(score_dom, score_sub) + mt = mt_sub + else: + # Avoid promoting a (domain, subdomain) pair when only the domain token matches. + score = 0.0 + mt = mt_sub + if score <= 0: + continue + resolution.candidate_subdomains.append( + ScopeCandidate( + value=composite, + score=score, + source_layer="D1_DOCUMENT_CATALOG", + match_type=mt, + ) + ) + + +def _collect_entity_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None: + for rec in catalog.entity_records: + name = str(rec.get("name") or "").strip().lower() + if not name or len(name) < 2: + continue + blob = str(rec.get("blob") or "").lower() + layer = str(rec.get("source_layer") or "") + score, mtype = _match_entity(query_l, name, blob) + if score <= 0: + continue + resolution.candidate_entities.append( + ScopeCandidate(value=name, score=score, source_layer=layer, match_type=mtype) + ) + + +def _collect_api_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None: + for rec in catalog.api_records: + ep = str(rec.get("endpoint") or "").strip().lower() + if not ep: + continue + layer = str(rec.get("source_layer") or "") + score, mtype = _match_score(query_l, ep.replace(" ", "")) + if score <= 0: + continue + resolution.candidate_apis.append( + ScopeCandidate(value=ep, score=score, source_layer=layer, match_type=mtype) + ) + + +def _phrase_score(query_l: str, dom: str, sub: str) -> float: + if _contains_token(query_l, dom) and _contains_token(query_l, sub): + return max(_SCORE_NORMALIZED, 0.9) + joined = re.sub(r"\s+", " ", f"{dom} {sub}".strip()) + if joined in query_l or query_l in joined: + return _SCORE_NORMALIZED + return 0.0 + + +def _match_entity(query_l: str, name: str, blob: str) -> tuple[float, str]: + score, mt = _match_score(query_l, name) + if score > 0: + return score, mt + if name in blob and len(name) >= 4: + # cross-language hints: name appears in catalog blob; small boost if query token overlaps blob + q_tokens = set(query_l.split()) + b_tokens = set(blob.split()) + overlap = q_tokens & b_tokens + if overlap and (q_tokens & {name} or name[:4] in query_l): + return _SCORE_SOFT, "normalized" + return 0.0, "normalized" + + +def _match_score(query_l: str, value: str) -> tuple[float, str]: + v = str(value or "").strip().lower() + if not v: + return 0.0, "normalized" + v_compact = v.replace(" ", "") + q_compact = query_l.replace(" ", "") + if v == query_l or v_compact == q_compact: + return _SCORE_EXACT, "exact" + if _contains_token(query_l, v) or _contains_token(query_l, v.replace("/", " ")): + return _SCORE_EXACT, "exact" + if v in q_compact or v_compact in q_compact: + return _SCORE_NORMALIZED, "normalized" + if v in query_l: + return _SCORE_NORMALIZED, "normalized" + # prefix / slug + for token in query_l.split(): + if token.startswith(v[: min(4, len(v))]) and len(v) >= 4: + return _SCORE_SOFT, "normalized" + return 0.0, "normalized" + + +def _contains_token(hay: str, needle: str) -> bool: + if not needle: + return False + return f" {needle} " in f" {hay} " + + +def _dedupe_candidates(resolution: ScopeResolution) -> None: + resolution.candidate_domains = _dedupe_list(resolution.candidate_domains) + resolution.candidate_subdomains = _dedupe_list(resolution.candidate_subdomains) + resolution.candidate_entities = _dedupe_list(resolution.candidate_entities) + resolution.candidate_apis = _dedupe_list(resolution.candidate_apis) + + +def _dedupe_list(items: list[ScopeCandidate]) -> list[ScopeCandidate]: + best: dict[str, ScopeCandidate] = {} + for c in items: + key = f"{c.value}|{c.source_layer}" + prev = best.get(key) + if prev is None or c.score > prev.score: + best[key] = c + return sorted(best.values(), key=lambda c: (-c.score, c.value)) + + +def _is_global_enumeration(query_l: str, *, has_strong_any: bool) -> bool: + if has_strong_any: + return False + if any(m in query_l for m in _PROJECT_WIDE_MARKERS) and any( + m in query_l for m in ("какие", "какой", "список", "перечисли", "метод", "api") + ): + return True + if any(query_l.strip().startswith(m.strip()) for m in _ENUM_MARKERS_RU if len(m.strip()) > 2): + if any(k in query_l for k in ("метод", "api", "ручк", "эндпоинт")): + return True + return False diff --git a/src/app/core/agent/processes/v2/intent_router/router.py b/src/app/core/agent/processes/v2/intent_router/router.py index c78596d..f987c99 100644 --- a/src/app/core/agent/processes/v2/intent_router/router.py +++ b/src/app/core/agent/processes/v2/intent_router/router.py @@ -2,8 +2,17 @@ from __future__ import annotations +from collections.abc import Callable +from dataclasses import replace + from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer +from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog, build_docs_scope_catalog +from app.core.agent.processes.v2.intent_router.modules.scope_resolver import ( + plausible_doc_endpoint_paths, + promote_target_terms, + resolve_docs_scope, +) from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor from app.core.agent.processes.v2.intent_router.models import QueryFeatures from app.core.agent.processes.v2.intent_router.routers.confidence import V2ConfidenceAdjuster @@ -11,8 +20,18 @@ from app.core.agent.processes.v2.intent_router.routers.fallback import V2Fallbac from app.core.agent.processes.v2.intent_router.routers.llm import V2LlmRouter from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2RouteCatalog from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator -from app.core.agent.processes.v2.models import V2RouteResult +from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType from app.core.agent.utils.llm import AgentLlmService +from app.core.rag.persistence.query_repository import RagQueryRepository + + +def _scope_candidate_dict(candidate) -> dict[str, object]: + return { + "value": candidate.value, + "score": candidate.score, + "source_layer": candidate.source_layer, + "match_type": candidate.match_type, + } class V2IntentRouter: @@ -25,6 +44,7 @@ class V2IntentRouter: enable_llm_disambiguation: bool = True, route_catalog: V2RouteCatalog | None = None, confidence_adjuster: V2ConfidenceAdjuster | None = None, + scope_rows_provider: Callable[[str], list[dict]] | None = None, ) -> None: self._normalizer = normalizer or V2QueryNormalizer() self._target_terms_extractor = target_terms_extractor or V2TargetTermsExtractor() @@ -35,23 +55,48 @@ class V2IntentRouter: self._confidence_adjuster = confidence_adjuster or V2ConfidenceAdjuster() self._enable_llm_disambiguation = enable_llm_disambiguation self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None + self._scope_rows_provider = scope_rows_provider - def route(self, user_query: str) -> V2RouteResult: + def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult: normalized_query = self._normalizer.normalize(user_query) target_terms_analysis = self._target_terms_extractor.extract(normalized_query) - anchor_analysis = self._anchor_extractor.extract(normalized_query, target_terms_analysis) + sanitized_eps = plausible_doc_endpoint_paths(list(target_terms_analysis.endpoint_paths)) + if sanitized_eps != list(target_terms_analysis.endpoint_paths): + target_terms_analysis = replace(target_terms_analysis, endpoint_paths=sanitized_eps) + allowed_paths = set(sanitized_eps) + target_terms_analysis = replace( + target_terms_analysis, + target_terms=[ + t + for t in target_terms_analysis.target_terms + if not str(t).startswith("/") or str(t).lower() in allowed_paths + ], + ) + raw_target_terms = list(target_terms_analysis.target_terms) + scope_rows = self._load_scope_rows(rag_session_id) + scope_catalog: DocsScopeCatalog | None + if not scope_rows: + scope_catalog = None + else: + scope_catalog = build_docs_scope_catalog(scope_rows) + resolution = resolve_docs_scope(normalized_query, target_terms_analysis, scope_catalog) + promoted_terms = promote_target_terms(raw_target_terms, target_terms_analysis, resolution) + refined_terms = replace(target_terms_analysis, target_terms=promoted_terms) + anchor_analysis = self._anchor_extractor.extract(normalized_query, refined_terms) + self._apply_scope_to_anchors(anchor_analysis.anchors, resolution) features = QueryFeatures( normalized_query=normalized_query, - target_terms=list(target_terms_analysis.target_terms), - endpoint_paths=list(target_terms_analysis.endpoint_paths), + target_terms=list(refined_terms.target_terms), + endpoint_paths=list(refined_terms.endpoint_paths), file_names=list(anchor_analysis.anchors.file_names), - matched_aliases=list(target_terms_analysis.matched_aliases), + matched_aliases=list(refined_terms.matched_aliases), target_doc_hints=list(anchor_analysis.anchors.target_doc_hints), file_markers=list(anchor_analysis.file_markers), architecture_markers=list(anchor_analysis.architecture_markers), logic_markers=list(anchor_analysis.logic_markers), domain_markers=list(anchor_analysis.domain_markers), endpoint_markers=list(anchor_analysis.endpoint_markers), + scope_type=resolution.scope_type, ) llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None llm_candidate = self._route_with_llm( @@ -59,7 +104,6 @@ class V2IntentRouter: anchors=anchor_analysis.anchors, ) llm_result = self._validator.validate(llm_candidate) - llm_result = self._apply_deterministic_corrections(llm_result, features) if llm_result is not None: confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features) return V2RouteResult( @@ -74,14 +118,53 @@ class V2IntentRouter: routing_mode="llm_default", llm_router_used=True, reason_short=str(llm_result["reason_short"]), + scope_type=resolution.scope_type, + ) + if llm_attempted: + return self._fallback_router.route_without_deterministic_signals( + user_query=user_query, + features=features, + anchors=anchor_analysis.anchors, + scope_type=resolution.scope_type, ) return self._fallback_router.route( user_query=user_query, features=features, anchors=anchor_analysis.anchors, llm_attempted=llm_attempted, + scope_type=resolution.scope_type, ) + def _load_scope_rows(self, rag_session_id: str | None) -> list[dict]: + sid = str(rag_session_id or "").strip() + if not sid: + return [] + if self._scope_rows_provider is not None: + return self._scope_rows_provider(sid) + try: + return RagQueryRepository().list_docs_scope_index_rows(sid) + except Exception: + return [] + + def _apply_scope_to_anchors(self, anchors, resolution) -> None: + anchors.candidate_domains = list(resolution.candidate_domains) + anchors.candidate_subdomains = list(resolution.candidate_subdomains) + anchors.candidate_entities = list(resolution.candidate_entities) + anchors.candidate_apis = list(resolution.candidate_apis) + if not resolution.catalog_loaded: + return + merged_endpoints = list(dict.fromkeys([*resolution.strong_endpoint_paths, *anchors.endpoint_paths])) + anchors.endpoint_paths = merged_endpoints + merged_entities = list(dict.fromkeys([*resolution.strong_entity_names, *anchors.entity_names])) + anchors.entity_names = merged_entities + if resolution.strong_domain: + anchors.process_domain = resolution.strong_domain + if resolution.strong_subdomain: + anchors.process_subdomain = resolution.strong_subdomain + if resolution.scope_type == V2ScopeType.SUBDOMAIN and resolution.strong_domain and resolution.strong_subdomain: + anchors.process_domain = resolution.strong_domain + anchors.process_subdomain = resolution.strong_subdomain + def _route_with_llm(self, *, features: QueryFeatures, anchors) -> dict | None: if not self._enable_llm_disambiguation or self._llm_router is None: return None @@ -89,6 +172,7 @@ class V2IntentRouter: return self._llm_router.classify( normalized_query=features.normalized_query, target_terms=features.target_terms, + scope_type=features.scope_type, anchors={ "entity_names": anchors.entity_names, "file_names": anchors.file_names, @@ -97,22 +181,11 @@ class V2IntentRouter: "matched_aliases": anchors.matched_aliases, "process_domain": anchors.process_domain, "process_subdomain": anchors.process_subdomain, + "candidate_domains": [_scope_candidate_dict(c) for c in anchors.candidate_domains], + "candidate_subdomains": [_scope_candidate_dict(c) for c in anchors.candidate_subdomains], + "candidate_entities": [_scope_candidate_dict(c) for c in anchors.candidate_entities], + "candidate_apis": [_scope_candidate_dict(c) for c in anchors.candidate_apis], }, ) except Exception: return None - - def _apply_deterministic_corrections(self, candidate: dict | None, features: QueryFeatures) -> dict | None: - if candidate is None: - return None - if candidate.get("routing_domain") == "DOCS" and self._should_force_find_files(features): - corrected = dict(candidate) - corrected["subintent"] = "FIND_FILES" - return corrected - return candidate - - def _should_force_find_files(self, features: QueryFeatures) -> bool: - if features.file_markers or features.file_names: - return True - query = features.normalized_query.lower() - return "show doc" in query or "show file" in query or "doc for" in query diff --git a/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py b/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py index 0c5fc3c..a99f306 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py +++ b/src/app/core/agent/processes/v2/intent_router/routers/deterministic.py @@ -1,7 +1,7 @@ from __future__ import annotations from app.core.agent.processes.v2.intent_router.models import QueryFeatures -from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent from app.core.agent.processes.v2.intent_router.routers.docs_subintent_resolver import DocsSubintentResolver diff --git a/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py b/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py index e1265dc..498e3c4 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py +++ b/src/app/core/agent/processes/v2/intent_router/routers/docs_subintent_resolver.py @@ -1,13 +1,33 @@ from __future__ import annotations from app.core.agent.processes.v2.intent_router.models import QueryFeatures -from app.core.agent.processes.v2.models import V2Subintent +from app.core.agent.utils.process_v2.models import V2Subintent class DocsSubintentResolver: + _API_ENUM_MARKERS = ( + "какие api", + "какие эндпоинты", + "какие endpoint", + "список api", + "список эндпоинтов", + "список endpoint", + "все api", + "все эндпоинты", + "перечисли api", + "перечисли эндпоинты", + "доступные api", + "available endpoints", + "exposed api", + ) + _API_WORD_MARKERS = ("api", "эндпоинт", "endpoint", "роут", "route", "метод") + _LIST_WORD_MARKERS = ("какие", "список", "перечисли", "все", "доступные", "list", "available", "exposed") + def resolve(self, features: QueryFeatures) -> str | None: if features.file_markers or self._has_file_like_anchor(features): return V2Subintent.FIND_FILES + if self._is_api_exposed_request(features): + return V2Subintent.API_EXPOSED if any( ( features.endpoint_paths, @@ -26,3 +46,13 @@ class DocsSubintentResolver: hint.endswith((".md", ".yaml", ".yml", ".json")) for hint in features.target_doc_hints ) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names) + + def _is_api_exposed_request(self, features: QueryFeatures) -> bool: + query = features.normalized_query.lower() + if features.endpoint_paths: + return False + if any(marker in query for marker in self._API_ENUM_MARKERS): + return True + has_api_words = any(marker in query for marker in self._API_WORD_MARKERS) + has_list_words = any(marker in query for marker in self._LIST_WORD_MARKERS) + return has_api_words and has_list_words diff --git a/src/app/core/agent/processes/v2/intent_router/routers/fallback.py b/src/app/core/agent/processes/v2/intent_router/routers/fallback.py index a88df36..245716a 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/fallback.py +++ b/src/app/core/agent/processes/v2/intent_router/routers/fallback.py @@ -1,10 +1,33 @@ from __future__ import annotations from app.core.agent.processes.v2.intent_router.models import QueryFeatures -from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2ScopeType, V2Subintent class V2FallbackRouter: + def route_without_deterministic_signals( + self, + *, + user_query: str, + features: QueryFeatures, + anchors, + scope_type: str = V2ScopeType.UNKNOWN, + ) -> V2RouteResult: + return V2RouteResult( + routing_domain=V2Domain.GENERAL, + intent=V2Intent.GENERAL_QA, + subintent=V2Subintent.SUMMARY, + user_query=user_query, + normalized_query=features.normalized_query, + target_terms=features.target_terms, + anchors=anchors, + confidence=0.0, + routing_mode="llm_fallback", + llm_router_used=True, + reason_short="llm route unresolved", + scope_type=scope_type, + ) + def route( self, *, @@ -12,6 +35,7 @@ class V2FallbackRouter: features: QueryFeatures, anchors, llm_attempted: bool, + scope_type: str = V2ScopeType.UNKNOWN, ) -> V2RouteResult: if features.file_markers: return self._build_docs_result( @@ -21,6 +45,32 @@ class V2FallbackRouter: subintent=V2Subintent.FIND_FILES, llm_attempted=llm_attempted, reason="fallback file markers", + scope_type=scope_type, + ) + if self._has_docs_update_signal(features): + return V2RouteResult( + routing_domain=V2Domain.DOCS, + intent=V2Intent.DOC_UPDATE, + subintent=V2Subintent.FROM_FEATURE, + user_query=user_query, + normalized_query=features.normalized_query, + target_terms=features.target_terms, + anchors=anchors, + confidence=0.0, + routing_mode=self._routing_mode(llm_attempted), + llm_router_used=llm_attempted, + reason_short="fallback docs update from feature", + scope_type=scope_type, + ) + if self._has_api_exposed_signal(features): + return self._build_docs_result( + user_query=user_query, + features=features, + anchors=anchors, + subintent=V2Subintent.API_EXPOSED, + llm_attempted=llm_attempted, + reason="fallback docs api exposed", + scope_type=scope_type, ) if self._has_docs_signal(features): return self._build_docs_result( @@ -30,6 +80,7 @@ class V2FallbackRouter: subintent=V2Subintent.SUMMARY, llm_attempted=llm_attempted, reason="fallback docs summary", + scope_type=scope_type, ) return V2RouteResult( routing_domain=V2Domain.GENERAL, @@ -43,6 +94,7 @@ class V2FallbackRouter: routing_mode=self._routing_mode(llm_attempted), llm_router_used=llm_attempted, reason_short="fallback general summary", + scope_type=scope_type, ) def _build_docs_result( @@ -54,6 +106,7 @@ class V2FallbackRouter: subintent: str, llm_attempted: bool, reason: str, + scope_type: str = V2ScopeType.UNKNOWN, ) -> V2RouteResult: return V2RouteResult( routing_domain=V2Domain.DOCS, @@ -67,6 +120,7 @@ class V2FallbackRouter: routing_mode=self._routing_mode(llm_attempted), llm_router_used=llm_attempted, reason_short=reason, + scope_type=scope_type, ) def _has_docs_signal(self, features: QueryFeatures) -> bool: @@ -82,5 +136,30 @@ class V2FallbackRouter: ) ) + def _has_api_exposed_signal(self, features: QueryFeatures) -> bool: + query = features.normalized_query.lower() + has_api = any(marker in query for marker in ("api", "эндпоинт", "endpoint", "роут", "route", "метод")) + has_listing = any(marker in query for marker in ("какие", "список", "перечисли", "все", "available", "list")) + return has_api and has_listing and not features.endpoint_paths and not features.file_markers + + def _has_docs_update_signal(self, features: QueryFeatures) -> bool: + query = features.normalized_query.lower() + has_update = any( + marker in query + for marker in ( + "обнов", + "измен", + "внести правк", + "docs update", + "update documentation", + "документац", + ) + ) + has_feature = any( + marker in query + for marker in ("системной аналитик", "feature", ".md", "confluence", "from feature") + ) + return has_update and has_feature + def _routing_mode(self, llm_attempted: bool) -> str: return "llm_fallback" if llm_attempted else "deterministic_fallback" diff --git a/src/app/core/agent/processes/v2/intent_router/routers/llm.py b/src/app/core/agent/processes/v2/intent_router/routers/llm.py index 291dd96..0a04072 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/llm.py +++ b/src/app/core/agent/processes/v2/intent_router/routers/llm.py @@ -17,10 +17,18 @@ class V2LlmRouter: self._prompt_name = prompt_name self._catalog = catalog or V2RouteCatalog() - def classify(self, *, normalized_query: str, target_terms: list[str], anchors: dict) -> dict | None: + def classify( + self, + *, + normalized_query: str, + target_terms: list[str], + anchors: dict, + scope_type: str = "unknown", + ) -> dict | None: payload = { "normalized_query": normalized_query, "target_terms": target_terms, + "scope_type": scope_type, "anchors": anchors, "allowed_routes": self._catalog.allowed_routes(), } diff --git a/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml b/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml index 7959dc1..e0a4f6d 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml +++ b/src/app/core/agent/processes/v2/intent_router/routers/prompts.yml @@ -3,9 +3,12 @@ namespace: v2_intent_router prompts: route: | Ты выбираешь маршрут для узкого процесса v2. + Поле `scope_type` и блок `anchors` с `candidate_*` — это предварительная привязка к каталогу документации текущей RAG-сессии (детерминированно извлечённые кандидаты). Не выдумывай домены, сущности и API, которых нет в этих полях; используй их для снятия неоднозначности. Основной принцип: - DOCS / DOC_EXPLAIN / FIND_FILES: запрос просит найти файл, документ или путь. + - DOCS / DOC_EXPLAIN / API_EXPOSED: запрос просит перечислить доступные API-методы/эндпоинты. - DOCS / DOC_EXPLAIN / SUMMARY: запрос просит объяснить документацию, endpoint, архитектуру, процесс или сущность. + - DOCS / DOC_UPDATE / FROM_FEATURE: запрос просит обновить документацию по системной аналитике (feature markdown/confluence). - GENERAL / GENERAL_QA / SUMMARY: общий обзорный вопрос без явного запроса к документации. Используй только маршруты из поля `allowed_routes`. @@ -17,8 +20,8 @@ prompts: Ответь только JSON-объектом вида: { "routing_domain": "GENERAL" | "DOCS", - "intent": "GENERAL_QA" | "DOC_EXPLAIN", - "subintent": "SUMMARY" | "FIND_FILES", + "intent": "GENERAL_QA" | "DOC_EXPLAIN" | "DOC_UPDATE", + "subintent": "SUMMARY" | "FIND_FILES" | "API_EXPOSED" | "FROM_FEATURE", "confidence": 0.0-1.0, "reason_short": "короткая причина" } diff --git a/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py b/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py index f4110fe..295928c 100644 --- a/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py +++ b/src/app/core/agent/processes/v2/intent_router/routers/route_catalog.py @@ -1,12 +1,14 @@ from __future__ import annotations -from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2Subintent +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent class V2RouteCatalog: _ALLOWED_ROUTES = ( (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES), + (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED), (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY), + (V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE), (V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY), ) diff --git a/src/app/core/agent/processes/v2/process.py b/src/app/core/agent/processes/v2/process.py deleted file mode 100644 index b862ce5..0000000 --- a/src/app/core/agent/processes/v2/process.py +++ /dev/null @@ -1,304 +0,0 @@ -"""Процесс v2: роутинг, план retrieval, вызов rag API, сборка evidence и workflow.""" - -from __future__ import annotations - -from app.core.agent.processes.v2.anchor_signals import route_anchor_summary -from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler -from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate -from app.core.agent.processes.v2.intent_router import V2IntentRouter -from app.core.agent.processes.v2.models import V2Intent, V2Subintent -from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver -from app.core.agent.processes.v2.retrieval.target_doc_seeding import ( - RagRowIndex, - merge_row_lists, - normalize_doc_path, - normalized_path_set, - row_path, - seed_candidates_from_target_hints, -) -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter -from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext -from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph -from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext -from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph -from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext -from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph -from app.core.agent.processes.base import AgentProcess, ProcessResult -from app.core.agent.utils.llm import AgentLlmService - - -class V2Process(AgentProcess): - version = "v2" - - def __init__( - self, - llm: AgentLlmService, - policy_resolver: V2RetrievalPolicyResolver, - rag_adapter: V2RagRetrievalAdapter, - evidence_assembler: DocsEvidenceAssembler, - evidence_gate: DocsEvidenceGate | None = None, - router: V2IntentRouter | None = None, - docs_summary_prompt_name: str = "v2_docs_explain.summary_answer", - general_summary_prompt_name: str = "v2_general.summary_answer", - workflow_llm_enabled: bool = True, - ) -> None: - self._router = router or V2IntentRouter() - self._policy_resolver = policy_resolver - self._rag_adapter = rag_adapter - self._evidence_assembler = evidence_assembler - self._evidence_gate = evidence_gate or DocsEvidenceGate() - self._docs_summary_prompt_name = docs_summary_prompt_name - self._general_summary_prompt_name = general_summary_prompt_name - self._workflow_llm_enabled = workflow_llm_enabled - self._summary_graph = DocsExplainSummaryGraph(llm) - self._find_files_graph = DocsExplainFindFilesGraph() - self._general_summary_graph = GeneralSummaryGraph(llm) - - async def run(self, context) -> ProcessResult: - route = self._router.route(context.request.message) - rag_session_id = context.session.active_rag_session_id - context.trace.module("process.v2").log( - "intent_routed", - { - "routing_domain": route.routing_domain, - "intent": route.intent, - "subintent": route.subintent, - "normalized_query": route.normalized_query, - "target_terms": route.target_terms, - "anchors": route_anchor_summary(route), - "confidence": route.confidence, - "routing_mode": route.routing_mode, - "llm_router_used": route.llm_router_used, - "reason_short": route.reason_short, - "rag_session_id": rag_session_id, - }, - ) - self._log_step( - context, - "router_resolved", - { - "domain": route.routing_domain, - "intent": route.intent, - "subintent": route.subintent, - "confidence": route.confidence, - }, - ) - self._log_step( - context, - "anchors_extracted", - { - "signal_types": route_anchor_summary(route)["signal_types"], - "endpoint_paths": route.anchors.endpoint_paths, - "target_doc_hints": route.anchors.target_doc_hints, - "matched_aliases": route.anchors.matched_aliases, - "target_terms": route.target_terms, - }, - ) - self._log_step( - context, - "alias_resolution", - { - "resolved_aliases": route.anchors.matched_aliases, - "target_doc_hints": route.anchors.target_doc_hints, - }, - ) - if not rag_session_id: - if route.intent == V2Intent.GENERAL_QA: - answer = "Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией." - self._log_step(context, "evidence_gate_checked", {"passed": False, "reason": "missing_rag_session"}) - self._log_step(context, "answer_generated", {"answer_mode": "insufficient_evidence"}) - return ProcessResult(answer=answer) - return ProcessResult(answer="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией.") - plan = self._policy_resolver.resolve(route) - context.trace.module("process.v2.retrieval_policy").log( - "retrieval_plan_resolved", - {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters}, - ) - self._log_step( - context, - "retrieval_profile_selected", - {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters}, - ) - retrieved_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan) - metadata_rows = self._metadata_lookup_candidates(retrieved_rows, route) - rows = self._merge_candidate_rows(retrieved_rows, metadata_rows) - rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows)) - self._print_missing_target_hints(route, rows) - context.trace.module("process.v2.rag_retrieval").log( - "rag_rows_fetched", - { - "profile": plan.profile, - "row_count": len(rows), - "rows": [self._trace_row(row) for row in rows], - }, - ) - self._log_step( - context, - "candidate_generation", - { - "query": route.user_query, - "profile": plan.profile, - "details": { - "target_doc_hints": list(route.anchors.target_doc_hints), - "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)], - }, - "resolved_aliases": route.anchors.matched_aliases, - "target_doc_hints": route.anchors.target_doc_hints, - "candidate_docs_before_ranking": [self._trace_row(row) for row in rows[:8]], - "sources": { - "seeded": [self._trace_row(row) for row in retrieved_rows[:5] if row_path(row) in {normalize_doc_path(h) for h in route.anchors.target_doc_hints}], - "metadata_lookup": [self._trace_row(row) for row in metadata_rows[:5]], - "semantic": [self._trace_row(row) for row in retrieved_rows[:5]], - }, - }, - ) - self._log_step( - context, - "retrieval_executed", - { - "query": route.user_query, - "profile": plan.profile, - "row_count": len(rows), - "target_doc_hints": route.anchors.target_doc_hints, - "top_results": [self._trace_row(row) for row in rows[:5]], - }, - ) - if route.subintent == V2Subintent.FIND_FILES: - files = self._evidence_assembler.assemble_files(rows, route) - gate = self._evidence_gate.check_files(route, files) - context.trace.module("process.v2.evidence").log( - "evidence_assembled", - {"mode": "find_files", "file_count": len(files), "files": [file.path for file in files]}, - ) - self._log_step( - context, - "evidence_assembled", - {"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)}, - ) - self._log_ranking(context, files) - self._log_step( - context, - "evidence_gate_checked", - {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode}, - ) - flow_context = DocsExplainFindFilesContext( - runtime=context, - route=route, - rag_session_id=rag_session_id, - files=files, - gate_decision=gate, - ) - flow_context = await self._find_files_graph.run(flow_context) - self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)}) - return ProcessResult(answer=flow_context.answer) - documents = self._evidence_assembler.assemble_summaries(rows, route) - gate = self._evidence_gate.check_summaries(route, documents) - context.trace.module("process.v2.evidence").log( - "evidence_assembled", - {"mode": "summary", "document_count": len(documents), "documents": [item.path for item in documents]}, - ) - self._log_step( - context, - "evidence_assembled", - {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)}, - ) - self._log_ranking(context, documents) - self._log_step( - context, - "evidence_gate_checked", - {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode}, - ) - if route.intent == V2Intent.GENERAL_QA: - flow_context = GeneralSummaryContext( - runtime=context, - route=route, - prompt_name=self._general_summary_prompt_name, - workflow_llm_enabled=self._workflow_llm_enabled, - documents=documents, - gate_decision=gate, - ) - flow_context = await self._general_summary_graph.run(flow_context) - self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)}) - return ProcessResult(answer=flow_context.answer) - flow_context = DocsExplainSummaryContext( - runtime=context, - route=route, - rag_session_id=rag_session_id, - prompt_name=self._docs_summary_prompt_name, - workflow_llm_enabled=self._workflow_llm_enabled, - documents=documents, - gate_decision=gate, - ) - flow_context = await self._summary_graph.run(flow_context) - self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)}) - return ProcessResult(answer=flow_context.answer) - - def _trace_row(self, row: dict) -> dict[str, object]: - metadata = row.get("metadata") or {} - content = str(row.get("content") or "").strip() - return { - "layer": str(row.get("layer") or ""), - "path": str(row.get("path") or ""), - "title": str(row.get("title") or ""), - "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""), - "entity_name": str(metadata.get("entity_name") or ""), - "summary_text": str(metadata.get("summary_text") or "")[:400], - "section_path": str(metadata.get("section_path") or ""), - "content_preview": content[:400], - } - - def _log_step(self, context, step: str, payload: dict[str, object]) -> None: - context.trace.module("process.v2.pipeline").log(step, payload) - - def _print_missing_target_hints(self, route, rows: list[dict]) -> None: - if not route.anchors.target_doc_hints: - return - candidate_paths = normalized_path_set(rows) - for hint in route.anchors.target_doc_hints: - if not str(hint or "").strip(): - continue - normalized = normalize_doc_path(hint) - if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]: - continue - if normalized not in candidate_paths: - print("ERROR: target doc missing from candidates:", normalized) - - def _metadata_lookup_candidates(self, rows: list[dict], route) -> list[dict]: - return DocsMetadataLookupIndex(rows).lookup(route) - - def _merge_candidate_rows(self, *groups: list[dict]) -> list[dict]: - return merge_row_lists(*groups) - - def _log_ranking(self, context, items: list) -> None: - top_docs: list[dict[str, object]] = [] - for item in items[:4]: - top_docs.append( - { - "doc": getattr(item, "path", ""), - "score": getattr(item, "score", 0), - "match_reason": getattr(item, "match_reason", ""), - } - ) - context.trace.module("process.v2.pipeline").log( - "ranking_explained", - { - "doc": getattr(item, "path", ""), - "score_breakdown": getattr(item, "score_breakdown", {}), - "score": getattr(item, "score", 0), - "match_reason": getattr(item, "match_reason", ""), - }, - ) - context.trace.module("process.v2.pipeline").log( - "ranking_explained", - { - "top_docs_after_ranking": top_docs, - "ranking_score_breakdown": [ - { - "doc": getattr(item, "path", ""), - "score_breakdown": getattr(item, "score_breakdown", {}), - } - for item in items[:4] - ], - }, - ) diff --git a/src/app/core/agent/processes/v2/retrieval/__init__.py b/src/app/core/agent/processes/v2/retrieval/__init__.py deleted file mode 100644 index 11efcb3..0000000 --- a/src/app/core/agent/processes/v2/retrieval/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from app.core.agent.processes.v2.retrieval.metadata_lookup import DocsMetadataLookupIndex -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver -from app.core.agent.processes.v2.retrieval.target_doc_seeding import ( - RagRowIndex, - normalize_doc_path, - seed_candidates_from_target_hints, -) -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter - -__all__ = [ - "V2RetrievalPolicyResolver", - "V2RagRetrievalAdapter", - "DocsMetadataLookupIndex", - "normalize_doc_path", - "RagRowIndex", - "seed_candidates_from_target_hints", -] diff --git a/src/app/core/agent/processes/v2/retrieval/policy_resolver.py b/src/app/core/agent/processes/v2/retrieval/policy_resolver.py deleted file mode 100644 index 3184a73..0000000 --- a/src/app/core/agent/processes/v2/retrieval/policy_resolver.py +++ /dev/null @@ -1,270 +0,0 @@ -"""Intent-aware retrieval policy resolver for process v2.""" - -from __future__ import annotations - -from app.core.agent.processes.v2.anchor_signals import anchor_signal_types -from app.core.agent.processes.v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent -from app.core.rag.contracts.enums import RagLayer -from app.core.rag.retrieval.session_retriever import RetrievalPlan - - -class _AnchorTermCollector: - def prefer_like_patterns(self, route: V2RouteResult) -> list[str]: - terms = self._hint_basenames(route) - terms.extend(route.anchors.endpoint_paths) - terms.extend(route.target_terms) - terms.extend(route.anchors.file_names) - terms.extend(route.anchors.entity_names) - terms.extend(route.anchors.matched_aliases) - terms.extend(self._process_terms(route)) - return [f"%{term.lower()}%" for term in _unique_terms(terms)] - - def find_files_patterns(self, route: V2RouteResult) -> list[str]: - if route.anchors.target_doc_hints: - return [f"%{name.lower()}%" for name in self._hint_basenames(route)] - return self.prefer_like_patterns(route) - - def api_method_patterns(self, route: V2RouteResult) -> list[str]: - terms = self._hint_basenames(route) - terms.extend(route.anchors.target_doc_hints) - terms.extend(route.anchors.endpoint_paths) - terms.extend(route.target_terms) - patterns: list[str] = [] - for term in _unique_terms(terms): - lowered = term.lower() - stripped = lowered.strip("/") - if stripped: - patterns.append(f"%{stripped}%") - if lowered: - patterns.append(f"%{lowered}%") - return _unique_terms(patterns) - - def _hint_basenames(self, route: V2RouteResult) -> list[str]: - return [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()] - - def _process_terms(self, route: V2RouteResult) -> list[str]: - terms: list[str] = [] - if route.anchors.process_domain: - terms.append(route.anchors.process_domain) - if route.anchors.process_subdomain: - terms.append(route.anchors.process_subdomain) - return terms - - -class _RouteFilterBuilder: - _API_DOC_PREFIXES = [ - "docs/api/", - "docs/endpoints/", - "docs/methods/", - "api/", - "endpoints/", - "methods/", - ] - - def __init__(self) -> None: - self._terms = _AnchorTermCollector() - - def general_filters(self, route: V2RouteResult) -> dict[str, object]: - return { - "prefer_path_prefixes": ["docs/architecture/", "docs/"], - "prefer_like_patterns": ["%readme.md%", "%overview%"], - "target_doc_hints": list(route.anchors.target_doc_hints), - } - - def summary_filters(self, route: V2RouteResult) -> dict[str, object]: - if _is_api_method_explain(route): - return self.api_method_filters(route) - filters = self._base_filters(route) - filters["prefer_path_prefixes"] = self._summary_prefixes(route) - filters["prefer_like_patterns"] = self._terms.prefer_like_patterns(route) - if V2AnchorType.API_ENDPOINT in anchor_signal_types(route): - filters["path_prefixes"] = ["docs/api/", "docs/"] - return filters - - def api_method_filters(self, route: V2RouteResult) -> dict[str, object]: - filters = self._base_filters(route) - filters["path_prefixes"] = list(self._API_DOC_PREFIXES) - filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES) - filters["prefer_like_patterns"] = self._terms.api_method_patterns(route) - return filters - - def find_files_filters(self, route: V2RouteResult) -> dict[str, object]: - filters = self._base_filters(route) - prefixes = self._find_files_prefixes(route) - if prefixes: - filters["path_prefixes"] = prefixes - filters["prefer_path_prefixes"] = self._find_files_prefer_prefixes(route, prefixes) - filters["prefer_like_patterns"] = self._terms.find_files_patterns(route) - return filters - - def _base_filters(self, route: V2RouteResult) -> dict[str, object]: - filters: dict[str, object] = { - "target_doc_hints": list(route.anchors.target_doc_hints), - } - if route.anchors.process_domain: - filters["metadata.domain"] = route.anchors.process_domain - if route.anchors.process_subdomain: - filters["metadata.subdomain"] = route.anchors.process_subdomain - return filters - - def _find_files_prefixes(self, route: V2RouteResult) -> list[str]: - hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints) - if hint_prefixes: - return hint_prefixes - file_prefixes = [name for name in route.anchors.file_names if str(name).strip().startswith("docs/")] - derived = _prefixes_from_paths(file_prefixes) - if derived: - return derived - signals = anchor_signal_types(route) - if V2AnchorType.API_ENDPOINT in signals: - return ["docs/api/", "docs/"] - if V2AnchorType.ARCHITECTURE in signals: - return ["docs/architecture/", "docs/"] - if V2AnchorType.LOGIC_FLOW in signals: - return ["docs/logic/", "docs/"] - if V2AnchorType.DOMAIN_ENTITY in signals: - return ["docs/domains/", "docs/"] - return ["docs/"] - - def _find_files_prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]: - preferred = list(prefixes) - if route.anchors.process_domain or route.anchors.process_subdomain: - preferred.extend(["docs/domains/", "docs/logic/"]) - return _unique_terms(preferred or ["docs/"]) - - def _summary_prefixes(self, route: V2RouteResult) -> list[str]: - signals = anchor_signal_types(route) - prefixes: list[str] = [] - if V2AnchorType.API_ENDPOINT in signals: - prefixes.extend(["docs/api/", "docs/"]) - if V2AnchorType.ARCHITECTURE in signals: - prefixes.extend(["docs/architecture/", "docs/"]) - if V2AnchorType.LOGIC_FLOW in signals: - prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"]) - if V2AnchorType.DOMAIN_ENTITY in signals: - prefixes.extend(["docs/domains/", "docs/", "docs/api/"]) - return _unique_terms(prefixes or ["docs/"]) - - -class V2RetrievalPolicyResolver: - _GENERAL_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS] - _FIND_FILES_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG] - _SUMMARY_LAYERS = { - "docs_api_method_explain": [ - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_FACT_INDEX, - RagLayer.DOCS_DOC_CHUNKS, - ], - "docs_summary_api_endpoint": [ - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_FACT_INDEX, - RagLayer.DOCS_DOC_CHUNKS, - ], - "docs_summary_logic_flow": [ - RagLayer.DOCS_WORKFLOW_INDEX, - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_DOC_CHUNKS, - ], - "docs_summary_domain_entity": [ - RagLayer.DOCS_ENTITY_CATALOG, - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_DOC_CHUNKS, - ], - "docs_summary_architecture": [ - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_RELATION_GRAPH, - RagLayer.DOCS_DOC_CHUNKS, - ], - "docs_summary_generic": [ - RagLayer.DOCS_DOCUMENT_CATALOG, - RagLayer.DOCS_DOC_CHUNKS, - ], - } - - def __init__(self) -> None: - self._filters = _RouteFilterBuilder() - - def resolve(self, route: V2RouteResult) -> RetrievalPlan: - if route.intent == V2Intent.GENERAL_QA: - return RetrievalPlan( - profile="general_qa_grounded_summary", - layers=list(self._GENERAL_LAYERS), - limit=8, - filters=self._filters.general_filters(route), - ) - if route.subintent == V2Subintent.FIND_FILES: - return RetrievalPlan( - profile="file_lookup", - layers=list(self._FIND_FILES_LAYERS), - limit=12, - filters=self._filters.find_files_filters(route), - ) - profile = self._summary_profile(route) - return RetrievalPlan( - profile=profile, - layers=list(self._SUMMARY_LAYERS[profile]), - limit=10 if profile == "docs_api_method_explain" else 8, - filters=self._filters.summary_filters(route), - ) - - def _summary_profile(self, route: V2RouteResult) -> str: - if _is_api_method_explain(route): - return "docs_api_method_explain" - meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES} - if len(meaningful) != 1: - return "docs_summary_generic" - mapping = { - V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint", - V2AnchorType.ARCHITECTURE: "docs_summary_architecture", - V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow", - V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity", - } - return mapping.get(next(iter(meaningful)), "docs_summary_generic") - - -def _prefixes_from_paths(paths: list[str]) -> list[str]: - prefixes = [] - for path in paths: - value = str(path).strip().strip("/") - if "/" not in value: - continue - prefix = value.rsplit("/", 1)[0] + "/" - if prefix: - prefixes.append(prefix) - return _unique_terms(prefixes) - - -def _unique_terms(items: list[str]) -> list[str]: - seen: set[str] = set() - unique: list[str] = [] - for raw in items: - value = str(raw or "").strip() - if not value or value in seen: - continue - seen.add(value) - unique.append(value) - return unique - - -def _is_api_method_explain(route: V2RouteResult) -> bool: - if route.subintent != V2Subintent.SUMMARY: - return False - if route.anchors.endpoint_paths: - return True - if _has_api_like_hints(route.anchors.target_doc_hints): - return True - return V2AnchorType.API_ENDPOINT in anchor_signal_types(route) - - -def _has_api_like_hints(hints: list[str]) -> bool: - for hint in hints: - value = str(hint or "").strip().lower() - if not value: - continue - if value.startswith("/"): - return True - if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")): - return True - if "endpoint" in value or "method" in value: - return True - return False diff --git a/src/app/core/agent/processes/v2/v2_process.py b/src/app/core/agent/processes/v2/v2_process.py new file mode 100644 index 0000000..ddf6ef3 --- /dev/null +++ b/src/app/core/agent/processes/v2/v2_process.py @@ -0,0 +1,194 @@ +"""Процесс v2: роутинг запроса и dispatch в workflow.""" + +from __future__ import annotations + +from typing import Any + +from app.core.agent.processes.base import AgentProcess, ProcessResult +from app.core.agent.processes.v2.intent_router import V2IntentRouter +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import ( + DocExplainApiExposedContext, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph +from app.core.agent.processes.v2.workflows.doc_update_from_feature.graph import DocUpdateFromFeatureGraph +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import ( + DocUpdateFromFeatureContext, +) +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext +from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph +from app.core.agent.utils.llm import AgentLlmService +from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter + + +class V2Process(AgentProcess): + version = "v2" + + def __init__( + self, + llm: AgentLlmService, + policy_resolver: RetrievalPlanResolver, + rag_adapter: V2RagRetrievalAdapter, + evidence_assembler: DocsEvidenceAssembler, + evidence_gate: DocsEvidenceGate | None = None, + router: V2IntentRouter | None = None, + docs_summary_prompt_name: str = "v2_docs_explain.summary_answer", + general_summary_prompt_name: str = "v2_general.summary_answer", + workflow_llm_enabled: bool = True, + doc_rules_enabled: bool = True, + ) -> None: + self._router = router or V2IntentRouter() + gate = evidence_gate or DocsEvidenceGate() + self._docs_summary_prompt_name = docs_summary_prompt_name + self._general_summary_prompt_name = general_summary_prompt_name + self._workflow_llm_enabled = workflow_llm_enabled + self._doc_rules_enabled = doc_rules_enabled + self._workflows: dict[tuple[str, str, str], Any] = { + (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY): DocExplainSummaryGraph( + llm, + policy_resolver=policy_resolver, + rag_adapter=rag_adapter, + evidence_assembler=evidence_assembler, + evidence_gate=gate, + ), + (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES): DocExplainFindFilesGraph( + policy_resolver=policy_resolver, + rag_adapter=rag_adapter, + evidence_assembler=evidence_assembler, + evidence_gate=gate, + ), + (V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED): DocExplainApiExposedGraph( + policy_resolver=policy_resolver, + rag_adapter=rag_adapter, + ), + (V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE): DocUpdateFromFeatureGraph( + llm=llm, + doc_rules_enabled=doc_rules_enabled, + ), + (V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY): GeneralQaSummaryGraph( + llm, + policy_resolver=policy_resolver, + rag_adapter=rag_adapter, + evidence_assembler=evidence_assembler, + evidence_gate=gate, + ), + } + + async def run(self, context) -> ProcessResult: + rag_session_id = context.session.active_rag_session_id or "" + route = self._router.route(context.request.message, rag_session_id=rag_session_id or None) + context.trace.module("process.v2").log( + "intent_routed", + { + "routing_domain": route.routing_domain, + "intent": route.intent, + "subintent": route.subintent, + "normalized_query": route.normalized_query, + "target_terms": route.target_terms, + "anchors": route_anchor_summary(route), + "confidence": route.confidence, + "routing_mode": route.routing_mode, + "llm_router_used": route.llm_router_used, + "reason_short": route.reason_short, + "rag_session_id": rag_session_id, + }, + ) + self._log_step( + context, + "router_resolved", + { + "domain": route.routing_domain, + "intent": route.intent, + "subintent": route.subintent, + "confidence": route.confidence, + }, + ) + self._log_step( + context, + "anchors_extracted", + { + "signal_types": route_anchor_summary(route)["signal_types"], + "endpoint_paths": route.anchors.endpoint_paths, + "target_doc_hints": route.anchors.target_doc_hints, + "matched_aliases": route.anchors.matched_aliases, + "target_terms": route.target_terms, + }, + ) + self._log_step( + context, + "alias_resolution", + { + "resolved_aliases": route.anchors.matched_aliases, + "target_doc_hints": route.anchors.target_doc_hints, + }, + ) + flow_context = await self._run_workflow(context, route, rag_session_id) + if flow_context.answer_generated_payload is not None: + self._log_step(context, "answer_generated", dict(flow_context.answer_generated_payload)) + changeset = list(getattr(flow_context, "changeset", []) or []) + apply_changeset = bool(getattr(flow_context, "apply_changeset", False)) + return ProcessResult( + answer=flow_context.answer, + changeset=changeset, + apply_changeset=apply_changeset, + ) + + def _log_step(self, context, step: str, payload: dict[str, object]) -> None: + context.trace.module("process.v2.pipeline").log(step, payload) + + async def _run_workflow(self, runtime_context, route, rag_session_id: str): + workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent)) + if workflow is None: + raise ValueError(f"Unsupported v2 workflow route: {(route.routing_domain, route.intent, route.subintent)!r}") + if route.intent == V2Intent.GENERAL_QA: + return await workflow.run( + GeneralQaSummaryContext( + runtime=runtime_context, + route=route, + rag_session_id=rag_session_id, + prompt_name=self._general_summary_prompt_name, + workflow_llm_enabled=self._workflow_llm_enabled, + ) + ) + if route.subintent == V2Subintent.FIND_FILES: + return await workflow.run( + DocExplainFindFilesContext( + runtime=runtime_context, + route=route, + rag_session_id=rag_session_id, + ) + ) + if route.subintent == V2Subintent.API_EXPOSED: + return await workflow.run( + DocExplainApiExposedContext( + runtime=runtime_context, + route=route, + rag_session_id=rag_session_id, + ) + ) + if route.intent == V2Intent.DOC_UPDATE and route.subintent == V2Subintent.FROM_FEATURE: + return await workflow.run( + DocUpdateFromFeatureContext( + runtime=runtime_context, + route=route, + rag_session_id=rag_session_id, + doc_rules_enabled=self._doc_rules_enabled, + ) + ) + return await workflow.run( + DocExplainSummaryContext( + runtime=runtime_context, + route=route, + rag_session_id=rag_session_id, + prompt_name=self._docs_summary_prompt_name, + workflow_llm_enabled=self._workflow_llm_enabled, + ) + ) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md new file mode 100644 index 0000000..3dcf97b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/README.md @@ -0,0 +1,17 @@ +# DOC_EXPLAIN / API_EXPOSED Workflow + +## Контракт сабинтента + +| Поле | Значение | +|---|---| +| `domain` | `DOCS` | +| `intent` | `DOC_EXPLAIN` | +| `subintent` | `API_EXPOSED` | +| `workflow_id` | `v2.docs_explain.api_exposed` | +| `source` | `workflow.v2.api_exposed` | + +## Выходной формат + +Ответ формируется детерминированно как список endpoint-путей (`/path`) по одному на строку. +Scope учитывается через retrieval-policy фильтры `metadata.domain`/`metadata.subdomain` и path-префиксы API-документации. + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py new file mode 100644 index 0000000..48f7400 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/__init__.py @@ -0,0 +1,4 @@ +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph + +__all__ = ["DocExplainApiExposedGraph"] + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py new file mode 100644 index 0000000..2d62f50 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/graph.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.build_api_exposed_evidence_step import ( + BuildApiExposedEvidenceStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.fetch_rag_rows_step import FetchRagRowsStep +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.finalize_api_exposed_answer_step import ( + FinalizeApiExposedAnswerStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.require_rag_session_step import ( + RequireRagSessionStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.resolve_retrieval_plan_step import ( + ResolveRetrievalPlanStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import ( + ApiEndpointCollector, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.buffered_graph import ( + DocExplainApiExposedWorkflowGraph, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import ( + DocExplainApiExposedContext, +) +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter + + +class DocExplainApiExposedGraph(DocExplainApiExposedWorkflowGraph[DocExplainApiExposedContext]): + def __init__( + self, + policy_resolver: RetrievalPlanResolver, + rag_adapter: V2RagRetrievalAdapter, + ) -> None: + super().__init__( + workflow_id="v2.docs_explain.api_exposed", + source="workflow.v2.api_exposed", + steps=[ + RequireRagSessionStep( + missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией." + ), + ResolveRetrievalPlanStep(policy_resolver), + FetchRagRowsStep(rag_adapter), + BuildApiExposedEvidenceStep(ApiEndpointCollector()), + FinalizeApiExposedAnswerStep(), + ], + ) + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py new file mode 100644 index 0000000..bc0742b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/__init__.py @@ -0,0 +1,2 @@ +"""Steps for DOC_EXPLAIN/API_EXPOSED workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py new file mode 100644 index 0000000..c385a79 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/build_api_exposed_evidence_step.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import ( + ApiEndpointCollector, +) +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import ApiWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=ApiWorkflowContext) + + +class BuildApiExposedEvidenceStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "build_api_exposed_evidence" + title = "Сборка списка API" + + def __init__(self, collector: ApiEndpointCollector) -> None: + self._collector = collector + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.endpoints = self._collector.collect(context.retrieved_rows) + context.runtime.trace.module("process.v2.evidence").log( + "evidence_assembled", + {"mode": "api_exposed", "endpoint_count": len(context.endpoints), "endpoints": context.endpoints}, + ) + log_pipeline_step( + context.runtime, + "evidence_assembled", + {"mode": "api_exposed", "endpoint_count": len(context.endpoints)}, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"endpoint_count": len(context.endpoints)} + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py new file mode 100644 index 0000000..2eefcdc --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/fetch_rag_rows_step.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "fetch_rag_rows" + title = "Получение строк из RAG" + + def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None: + self._rag_adapter = rag_adapter + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + context.retrieved_rows = await self._rag_adapter.fetch_rows( + context.rag_session_id, + context.route.normalized_query, + context.retrieval_plan, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"retrieved_row_count": len(context.retrieved_rows)} + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py new file mode 100644 index 0000000..3f8cde6 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/finalize_api_exposed_answer_step.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import DocExplainApiExposedContext +from app.core.agent.utils.workflow import WorkflowStep + + +class FinalizeApiExposedAnswerStep(WorkflowStep[DocExplainApiExposedContext]): + step_id = "finalize_api_exposed_answer" + title = "Формирование ответа со списком API" + + async def run(self, context: DocExplainApiExposedContext) -> DocExplainApiExposedContext: + if context.answer: + return context + if not context.endpoints: + context.answer = "Не нашёл задокументированных API-эндпоинтов в выбранном scope." + context.answer_generated_payload = { + "answer_mode": "insufficient_evidence", + "answer_length": len(context.answer), + } + return context + context.answer = "\n".join(context.endpoints) + context.answer_generated_payload = { + "answer_mode": "deterministic", + "answer_length": len(context.answer), + } + return context + + def trace_output(self, context: DocExplainApiExposedContext) -> dict[str, object]: + return {"answer_length": len(context.answer)} + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py new file mode 100644 index 0000000..55bdc65 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/require_rag_session_step.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "require_rag_session" + title = "Проверка RAG-сессии" + + def __init__(self, *, missing_message: str) -> None: + self._missing_message = missing_message + + async def run(self, context: TContext) -> TContext: + if context.rag_session_id: + return context + context.answer = self._missing_message + context.answer_generated_payload = { + "answer_mode": "insufficient_evidence", + "answer_length": len(context.answer), + } + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"has_rag_session": bool(context.rag_session_id)} + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py new file mode 100644 index 0000000..6d782d6 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/resolve_retrieval_plan_step.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "resolve_retrieval_plan" + title = "Выбор retrieval-плана" + + def __init__(self, resolver: RetrievalPlanResolver) -> None: + self._resolver = resolver + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + plan = self._resolver.resolve(context.route) + context.retrieval_plan = plan + context.runtime.trace.module("process.v2.retrieval_policy").log( + "retrieval_plan_resolved", + {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters}, + ) + log_pipeline_step( + context.runtime, + "retrieval_profile_selected", + {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters}, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"profile": getattr(context.retrieval_plan, "profile", "")} + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py new file mode 100644 index 0000000..9df89fb --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/__init__.py @@ -0,0 +1,2 @@ +"""Retrieval helpers for DOC_EXPLAIN/API_EXPOSED workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py new file mode 100644 index 0000000..4f55a6c --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/api_endpoint_collector.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import re + + +class ApiEndpointCollector: + _METHODS = ("GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS") + _ENDPOINT_VALUE_RE = re.compile( + r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)" + ) + _METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)") + _PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)") + _DOC_EXTS = (".md", ".yaml", ".yml", ".json") + + def collect(self, rows: list[dict]) -> list[str]: + endpoints: list[str] = [] + for row in rows: + self._append_from_endpoint_metadata(endpoints, row) + self._append_from_title_fallback(endpoints, row) + for raw in self._row_candidates(row): + self._append_from_text(endpoints, raw) + return sorted(set(endpoints)) + + def _append_from_title_fallback(self, out: list[str], row: dict) -> None: + title = str(row.get("title") or "").strip() + if not title: + return + for match in self._PATH_RE.findall(title): + self._append_default(out, match) + + def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None: + metadata = dict(row.get("metadata") or {}) + endpoint_value = str(metadata.get("endpoint") or "").strip() + if not endpoint_value: + return + for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value): + self._append_methods_with_path(out, methods, path) + + def _row_candidates(self, row: dict) -> list[str]: + metadata = dict(row.get("metadata") or {}) + values = [ + metadata.get("name"), + metadata.get("summary_text"), + row.get("title"), + ] + return [str(value or "") for value in values if str(value or "").strip()] + + def _append_from_text(self, out: list[str], text: str) -> None: + for method, path in self._METHOD_PATH_RE.findall(text): + self._append_with_method(out, method, path) + + def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None: + methods = [ + part.strip().upper() + for part in str(methods_raw or "").split("|") + if part.strip().upper() in self._METHODS + ] + if not methods: + self._append_default(out, path_raw) + return + for method in methods: + self._append_with_method(out, method, path_raw) + + def _append_default(self, out: list[str], raw: str) -> None: + self._append_with_method(out, "GET", raw) + + def _append_with_method(self, out: list[str], method: str, raw: str) -> None: + value = str(raw or "").strip().strip("`'\"()[].,:;!?").lower() + if not value.startswith("/"): + return + if value.endswith(self._DOC_EXTS): + return + if len(value.split("/")) < 2: + return + endpoint = f"{method.upper()} {value}" + if endpoint not in out: + out.append(endpoint) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py new file mode 100644 index 0000000..af1972e --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/steps/retrieval/retrieval_policy.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult, V2Subintent +from app.core.rag.contracts.enums import RagLayer +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class DocExplainApiExposedRetrievalPolicy: + _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG] + _API_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"] + + def supports(self, route: V2RouteResult) -> bool: + return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.API_EXPOSED + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + return RetrievalPlan( + profile="api_exposed", + layers=list(self._LAYERS), + limit=400, + filters=self._filters(route), + ) + + def _filters(self, route: V2RouteResult) -> dict[str, object]: + query_signals = self._query_signals(route) + filters: dict[str, object] = { + "metadata.type": "api_method", + "prefer_path_prefixes": list(self._API_PREFIXES), + "target_doc_hints": list(route.anchors.target_doc_hints), + "prefer_like_patterns": self._like_patterns(route), + } + if query_signals: + filters["query_signals"] = query_signals + if route.anchors.process_domain: + filters["metadata.domain"] = route.anchors.process_domain + if route.anchors.process_subdomain: + filters["metadata.subdomain"] = route.anchors.process_subdomain + return filters + + def _like_patterns(self, route: V2RouteResult) -> list[str]: + raw: list[str] = ["api", "endpoint", "method", "эндпоинт", "метод"] + raw.extend(route.target_terms) + raw.extend(route.anchors.endpoint_paths) + raw.extend(route.anchors.target_doc_hints) + raw.extend(candidate.value for candidate in route.anchors.candidate_apis) + return [f"%{item.lower()}%" for item in _unique(raw)] + + def _query_signals(self, route: V2RouteResult) -> list[str]: + raw: list[str] = [] + raw.extend(route.target_terms) + raw.extend(route.anchors.endpoint_paths) + blocked = {"api", "endpoint", "method", "эндпоинт", "метод"} + return [item for item in _unique(raw) if item.lower() not in blocked] + + +def _unique(items: list[str]) -> list[str]: + out: list[str] = [] + seen: set[str] = set() + for item in items: + value = str(item or "").strip() + if not value or value in seen: + continue + seen.add(value) + out.append(value) + return out diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py new file mode 100644 index 0000000..359a31b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime helpers for the DOC_EXPLAIN/API_EXPOSED workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py new file mode 100644 index 0000000..6e17aa8 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/buffered_graph.py @@ -0,0 +1,42 @@ +"""Buffered graph for DOC_EXPLAIN/API_EXPOSED workflow.""" + +from __future__ import annotations + +from typing import TypeVar + +from app.core.agent.utils.workflow.context import WorkflowContext +from app.core.agent.utils.workflow.graph import WorkflowGraph + +TContext = TypeVar("TContext", bound=WorkflowContext) + + +class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]): + async def run(self, context: TContext) -> TContext: + trace = context.runtime.trace.module(self._source) + trace.log("workflow_started", {"workflow_id": self._workflow_id}) + steps_buffer: list[dict[str, object]] = [] + for step in self._steps: + inp = step.trace_input(context) + request_id = context.runtime.request.request_id + await context.runtime.publisher.publish_status( + request_id, + self._source, + f"Шаг workflow: {step.title}.", + {"workflow_id": self._workflow_id, "step_id": step.step_id}, + ) + context = await step.run(context) + out = step.trace_output(context) + trace.log( + "workflow_step_traced", + { + "workflow_id": self._workflow_id, + "step": {"id": step.step_id, "title": step.title}, + "input": inp, + "output": out, + }, + ) + steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) + trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer}) + trace.log("workflow_completed", {"workflow_id": self._workflow_id}) + return context + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py new file mode 100644 index 0000000..d34ad88 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +@dataclass(slots=True) +class DocExplainApiExposedContext: + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None = None + retrieved_rows: list[dict] = field(default_factory=list) + endpoints: list[str] = field(default_factory=list) + answer: str = "" + answer_generated_payload: dict[str, object] | None = None + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py new file mode 100644 index 0000000..163660b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/context_protocols.py @@ -0,0 +1,24 @@ +"""Context protocols for the DOC_EXPLAIN/API_EXPOSED workflow.""" + +from __future__ import annotations + +from typing import Protocol + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class RetrievalWorkflowContext(Protocol): + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None + retrieved_rows: list[dict] + answer: str + answer_generated_payload: dict[str, object] | None + + +class ApiWorkflowContext(RetrievalWorkflowContext, Protocol): + endpoints: list[str] + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py new file mode 100644 index 0000000..dc511be --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_api_exposed/workflow_runtime/pipeline_logging.py @@ -0,0 +1,8 @@ +"""Pipeline logging helpers for DOC_EXPLAIN/API_EXPOSED.""" + +from __future__ import annotations + + +def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None: + runtime.trace.module("process.v2.pipeline").log(step, payload) + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md new file mode 100644 index 0000000..c562d21 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/README.md @@ -0,0 +1,159 @@ +# DOC_EXPLAIN / FIND_FILES Workflow + +## Контракт сабинтента + +| Поле | Значение | +|---|---| +| `domain` | `DOCS` | +| `intent` | `DOC_EXPLAIN` | +| `subintent` | `FIND_FILES` | +| `workflow_id` | `v2.docs_explain.find_files` | +| `source` | `workflow.v2.find_files` | + +## Диаграмма флоу + +```mermaid +flowchart TD + A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"] + B --> C["FetchRagRowsStep"] + C --> D["PrepareCandidateRowsStep"] + D --> E["BuildFilesEvidenceStep"] + E --> F["ApplyFilesEvidenceGateStep"] + F --> G["FinalizeFindFilesAnswerStep"] +``` + +## Шаги процесса + +### 1) `RequireRagSessionStep` + +Шаг проверяет, есть ли активная RAG-сессия. Если `rag_session_id` пустой, workflow останавливает дальнейший retrieval и пишет пользовательское сообщение в `answer`. Для `find_files` gate-решение на этом шаге обычно не ставится, но механизм поддержан. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | `V2Process` -> `DocExplainFindFilesContext` | Идентификатор активной RAG-сессии | +| `self._missing_message` | Конфигурация в `graph.py` | Текст ответа, если сессии нет | +| `self._missing_gate` | Конфигурация шага | Опциональный gate для раннего выхода | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой | +| `context.gate_decision` | Заполняется `missing_gate`, если он передан и сессии нет | +| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе | + +### 2) `ResolveRetrievalPlanStep` + +Шаг превращает route в retrieval-план через `RetrievalPlanResolver`. Профиль для этого сабинтента — `file_lookup`, с подходящими слоями и фильтрами. Параллельно пишет trace-событие `retrieval_plan_resolved`. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | Результат `intent_router` | Route с `anchors`, `target_terms`, `scope_type` | +| `self._resolver` | DI из `graph.py` | Реализация policy-резолвера | +| `context.answer` | Предыдущие шаги | Если уже есть ответ, шаг пропускается | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieval_plan` | `self._resolver.resolve(context.route)` | +| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог с `profile`, `layers`, `limit`, `filters` | + +### 3) `FetchRagRowsStep` + +Шаг выполняет retrieval через `V2RagRetrievalAdapter`. Внутри адаптера объединяются seed-строки по `target_doc_hints` и основной retrieval по эмбеддингам/фильтрам плана. Если план не сформирован или уже есть готовый ответ, шаг ничего не делает. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | Контекст workflow | Сессия для поиска в `rag_chunks` | +| `context.route.normalized_query` | Route | Нормализованный текст запроса | +| `context.retrieval_plan` | Предыдущий шаг | План retrieval | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieved_rows` | `await rag_adapter.fetch_rows(rag_session_id, normalized_query, retrieval_plan)` | + +### 4) `PrepareCandidateRowsStep` + +Шаг собирает итоговые candidate rows для ранжирования файлов. Он добавляет metadata-lookup кандидаты и подмешивает seed по `target_doc_hints`, затем сохраняет merged-список в `context.rows`. Дополнительно пишет детальный retrieval-trace. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.retrieved_rows` | `FetchRagRowsStep` | Строки после retrieval | +| `context.route` | Route | Нужен для hints/aliases/terms | +| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata lookup | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` | +| `process.v2.rag_retrieval.rag_rows_fetched` | Лог деталей rows, источников и top результатов | + +### 5) `BuildFilesEvidenceStep` + +Шаг ранжирует candidate rows в список файлов через `DocsEvidenceAssembler.assemble_files`. На выходе формируется shortlist `RetrievedFile` с оценками и причинами совпадения. Этот shortlist становится опорой для gate и финального ответа. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты | +| `context.route` | Route | Сигналы маршрута для ranking | +| `self._assembler` | DI из `graph.py` | Сборщик evidence | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.files` | `assemble_files(context.rows, context.route)` | +| `process.v2.evidence.evidence_assembled` | Лог file-count и путей | + +### 6) `ApplyFilesEvidenceGateStep` + +Шаг проверяет качество shortlist через `DocsEvidenceGate.check_files`. Решение gate определяет, можно ли отвечать детерминированно или нужно более осторожное поведение. Для прозрачности пишет pipeline-лог с полями `passed/reason/answer_mode`. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | Route | Сигналы запроса для gate | +| `context.files` | `BuildFilesEvidenceStep` | Ранжированные файлы | +| `self._gate` | DI из `graph.py` | Правила оценки evidence | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.gate_decision` | `self._gate.check_files(context.route, context.files)` | +| `process.v2.pipeline.evidence_gate_checked` | Лог результата gate | + +### 7) `FinalizeFindFilesAnswerStep` + +Шаг собирает финальный текстовый ответ без LLM. Если файлов нет, возвращается `insufficient_evidence`; если файл один — отдаётся один путь; если несколько — до 4 путей. Если gate вернул `low_confidence_shortlist`, также возвращается ограниченный список путей. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.files` | `BuildFilesEvidenceStep` | Список найденных файлов | +| `context.gate_decision` | `ApplyFilesEvidenceGateStep` | Режим и причина ответа | +| `context.answer` | Предыдущие шаги | Если уже заполнен, шаг пропускается | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.answer` | Детерминированно: пусто/1 путь/до 4 путей | +| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке формирования | + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py new file mode 100644 index 0000000..02a46a6 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/__init__.py @@ -0,0 +1,3 @@ +from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph + +__all__ = ["DocExplainFindFilesGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py new file mode 100644 index 0000000..4424910 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/graph.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.buffered_graph import DocExplainFindFilesWorkflowGraph +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.apply_files_evidence_gate_step import ( + ApplyFilesEvidenceGateStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.build_files_evidence_step import BuildFilesEvidenceStep +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.fetch_rag_rows_step import FetchRagRowsStep +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.finalize_find_files_answer_step import ( + FinalizeFindFilesAnswerStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.prepare_candidate_rows_step import ( + PrepareCandidateRowsStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.require_rag_session_step import RequireRagSessionStep +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.resolve_retrieval_plan_step import ( + ResolveRetrievalPlanStep, +) +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter + + +class DocExplainFindFilesGraph(DocExplainFindFilesWorkflowGraph[DocExplainFindFilesContext]): + def __init__( + self, + policy_resolver: RetrievalPlanResolver, + rag_adapter: V2RagRetrievalAdapter, + evidence_assembler: DocsEvidenceAssembler, + evidence_gate: DocsEvidenceGate, + ) -> None: + super().__init__( + workflow_id="v2.docs_explain.find_files", + source="workflow.v2.find_files", + steps=[ + RequireRagSessionStep( + missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией." + ), + ResolveRetrievalPlanStep(policy_resolver), + FetchRagRowsStep(rag_adapter), + PrepareCandidateRowsStep(CandidateRowsBuilder()), + BuildFilesEvidenceStep(evidence_assembler), + ApplyFilesEvidenceGateStep(evidence_gate), + FinalizeFindFilesAnswerStep(), + ], + ) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py new file mode 100644 index 0000000..bbd1b71 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/apply_files_evidence_gate_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=FindFilesWorkflowContext) + + +class ApplyFilesEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "apply_files_evidence_gate" + title = "Проверка file evidence" + + def __init__(self, gate: DocsEvidenceGate) -> None: + self._gate = gate + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.gate_decision = self._gate.check_files(context.route, context.files) + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": context.gate_decision.passed, + "reason": context.gate_decision.reason, + "answer_mode": context.gate_decision.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"passed": bool(context.gate_decision and context.gate_decision.passed)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py new file mode 100644 index 0000000..55f8056 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/build_files_evidence_step.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=FindFilesWorkflowContext) + + +class BuildFilesEvidenceStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "build_files_evidence" + title = "Сборка file evidence" + + def __init__(self, assembler: DocsEvidenceAssembler) -> None: + self._assembler = assembler + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.files = self._assembler.assemble_files(context.rows, context.route) + context.runtime.trace.module("process.v2.evidence").log( + "evidence_assembled", + { + "mode": "find_files", + "file_count": len(context.files), + "files": [item.path for item in context.files], + }, + ) + log_pipeline_step( + context.runtime, + "evidence_assembled", + { + "mode": "find_files", + "primary_file": context.files[0].path if context.files else None, + "file_count": len(context.files), + }, + ) + log_ranking(context.runtime, context.files) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"file_count": len(context.files)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py new file mode 100644 index 0000000..bf24b3a --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/fetch_rag_rows_step.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "fetch_rag_rows" + title = "Получение строк из RAG" + + def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None: + self._rag_adapter = rag_adapter + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + context.retrieved_rows = await self._rag_adapter.fetch_rows( + context.rag_session_id, + context.route.normalized_query, + context.retrieval_plan, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"retrieved_row_count": len(context.retrieved_rows)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py new file mode 100644 index 0000000..4f770a2 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/finalize_find_files_answer_step.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext +from app.core.agent.utils.workflow import WorkflowStep + + +class FinalizeFindFilesAnswerStep(WorkflowStep[DocExplainFindFilesContext]): + step_id = "finalize_find_files_answer" + title = "Сборка списка файлов" + + async def run(self, context: DocExplainFindFilesContext) -> DocExplainFindFilesContext: + if context.answer: + return context + if not context.files: + context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу." + context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)} + return context + if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist": + context.answer = "\n".join(item.path for item in context.files[:4]) + context.answer_generated_payload = { + "answer_mode": context.gate_decision.answer_mode, + "answer_length": len(context.answer), + } + return context + if len(context.files) == 1: + context.answer = context.files[0].path + context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)} + return context + context.answer = "\n".join(item.path for item in context.files[:4]) + context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)} + return context + + def trace_output(self, context: DocExplainFindFilesContext) -> dict[str, object]: + return {"answer_length": len(context.answer)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py new file mode 100644 index 0000000..46cecc3 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/prepare_candidate_rows_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_retrieval_trace +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "prepare_candidate_rows" + title = "Подготовка candidate rows" + + def __init__(self, builder: CandidateRowsBuilder) -> None: + self._builder = builder + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + prepared = self._builder.build(context.retrieved_rows, context.route) + context.rows = prepared.rows + log_retrieval_trace( + context.runtime, + context.route, + context.retrieval_plan, + context.retrieved_rows, + prepared.metadata_rows, + prepared.rows, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"row_count": len(context.rows)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py new file mode 100644 index 0000000..17a03b6 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/require_rag_session_step.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "require_rag_session" + title = "Проверка RAG-сессии" + + def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None: + self._missing_message = missing_message + self._missing_gate = missing_gate + + async def run(self, context: TContext) -> TContext: + if context.rag_session_id: + return context + context.answer = self._missing_message + if self._missing_gate is not None: + context.gate_decision = self._missing_gate + context.answer_generated_payload = { + "answer_mode": self._missing_gate.answer_mode, + "answer_length": len(context.answer), + } + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": self._missing_gate.passed, + "reason": self._missing_gate.reason, + "answer_mode": self._missing_gate.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"has_rag_session": bool(context.rag_session_id)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py new file mode 100644 index 0000000..d87d5e5 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/resolve_retrieval_plan_step.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "resolve_retrieval_plan" + title = "Выбор retrieval-плана" + + def __init__(self, resolver: RetrievalPlanResolver) -> None: + self._resolver = resolver + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + plan = self._resolver.resolve(context.route) + context.retrieval_plan = plan + context.runtime.trace.module("process.v2.retrieval_policy").log( + "retrieval_plan_resolved", + {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters}, + ) + log_pipeline_step( + context.runtime, + "retrieval_profile_selected", + {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters}, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"profile": getattr(context.retrieval_plan, "profile", "")} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py new file mode 100644 index 0000000..373695e --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/__init__.py @@ -0,0 +1,2 @@ +"""Retrieval-related step helpers for the doc-explain find-files workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py new file mode 100644 index 0000000..e92f7c0 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/candidate_rows.py @@ -0,0 +1,43 @@ +"""Сборка candidate rows для doc-explain find-files (метаданные + сиды по hints).""" + +from __future__ import annotations + +from dataclasses import dataclass + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import ( + RagRowIndex, + merge_row_lists, + normalize_doc_path, + normalized_path_set, + seed_candidates_from_target_hints, +) + + +@dataclass(slots=True) +class CandidateRowsResult: + metadata_rows: list[dict] + rows: list[dict] + + +class CandidateRowsBuilder: + def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult: + metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route) + rows = merge_row_lists(retrieved_rows, metadata_rows) + rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows)) + self._print_missing_target_hints(route, rows) + return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows) + + def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None: + if not route.anchors.target_doc_hints: + return + candidate_paths = normalized_path_set(rows) + for hint in route.anchors.target_doc_hints: + if not str(hint or "").strip(): + continue + normalized = normalize_doc_path(hint) + if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]: + continue + if normalized not in candidate_paths: + print("ERROR: target doc missing from candidates:", normalized) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py new file mode 100644 index 0000000..f74df57 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/steps/retrieval/retrieval_policy.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types +from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteResult, V2Subintent +from app.core.rag.contracts.enums import RagLayer +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class DocExplainFindFilesRetrievalPolicy: + _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG] + + def supports(self, route: V2RouteResult) -> bool: + return route.subintent == V2Subintent.FIND_FILES + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + return RetrievalPlan( + profile="file_lookup", + layers=list(self._LAYERS), + limit=12, + filters=self._build_filters(route), + ) + + def _build_filters(self, route: V2RouteResult) -> dict[str, object]: + filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)} + if route.anchors.process_domain: + filters["metadata.domain"] = route.anchors.process_domain + if route.anchors.process_subdomain: + filters["metadata.subdomain"] = route.anchors.process_subdomain + prefixes = self._path_prefixes(route) + if prefixes: + filters["path_prefixes"] = prefixes + filters["prefer_path_prefixes"] = self._prefer_prefixes(route, prefixes) + filters["prefer_like_patterns"] = self._like_patterns(route) + return filters + + def _path_prefixes(self, route: V2RouteResult) -> list[str]: + hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints) + if hint_prefixes: + return hint_prefixes + file_prefixes = [item for item in route.anchors.file_names if str(item).strip().startswith("docs/")] + derived = _prefixes_from_paths(file_prefixes) + if derived: + return derived + signals = anchor_signal_types(route) + if V2AnchorType.API_ENDPOINT in signals: + return ["docs/api/", "docs/"] + if V2AnchorType.ARCHITECTURE in signals: + return ["docs/architecture/", "docs/"] + if V2AnchorType.LOGIC_FLOW in signals: + return ["docs/logic/", "docs/"] + if V2AnchorType.DOMAIN_ENTITY in signals: + return ["docs/domains/", "docs/"] + return ["docs/"] + + def _prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]: + preferred = list(prefixes) + if route.anchors.process_domain or route.anchors.process_subdomain: + preferred.extend(["docs/domains/", "docs/logic/"]) + return _unique_terms(preferred or ["docs/"]) + + def _like_patterns(self, route: V2RouteResult) -> list[str]: + if route.anchors.target_doc_hints: + names = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()] + return [f"%{name.lower()}%" for name in names] + terms = list(route.target_terms) + terms.extend(route.anchors.endpoint_paths) + terms.extend(route.anchors.file_names) + terms.extend(route.anchors.entity_names) + terms.extend(route.anchors.matched_aliases) + if route.anchors.process_domain: + terms.append(route.anchors.process_domain) + if route.anchors.process_subdomain: + terms.append(route.anchors.process_subdomain) + return [f"%{term.lower()}%" for term in _unique_terms(terms)] + + +def _prefixes_from_paths(paths: list[str]) -> list[str]: + prefixes: list[str] = [] + for path in paths: + value = str(path).strip().strip("/") + if "/" not in value: + continue + prefix = value.rsplit("/", 1)[0] + "/" + if prefix: + prefixes.append(prefix) + return _unique_terms(prefixes) + + +def _unique_terms(items: list[str]) -> list[str]: + seen: set[str] = set() + unique: list[str] = [] + for raw in items: + value = str(raw or "").strip() + if not value or value in seen: + continue + seen.add(value) + unique.append(value) + return unique + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py new file mode 100644 index 0000000..17da4e5 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime helpers for the doc-explain find-files workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py new file mode 100644 index 0000000..3fbd3a3 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/buffered_graph.py @@ -0,0 +1,46 @@ +"""Граф workflow doc-explain find-files: буфер шагов и один сброс в trace (на базе utils.workflow).""" + +from __future__ import annotations + +from typing import TypeVar + +from app.core.agent.utils.workflow.context import WorkflowContext +from app.core.agent.utils.workflow.graph import WorkflowGraph + +TContext = TypeVar("TContext", bound=WorkflowContext) + + +class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]): + """Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``.""" + + async def run(self, context: TContext) -> TContext: + trace = context.runtime.trace.module(self._source) + trace.log("workflow_started", {"workflow_id": self._workflow_id}) + steps_buffer: list[dict[str, object]] = [] + for step in self._steps: + inp = step.trace_input(context) + request_id = context.runtime.request.request_id + await context.runtime.publisher.publish_status( + request_id, + self._source, + f"Шаг workflow: {step.title}.", + {"workflow_id": self._workflow_id, "step_id": step.step_id}, + ) + context = await step.run(context) + out = step.trace_output(context) + trace.log( + "workflow_step_traced", + { + "workflow_id": self._workflow_id, + "step": {"id": step.step_id, "title": step.title}, + "input": inp, + "output": out, + }, + ) + steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) + trace.log( + "workflow_trace_flushed", + {"workflow_id": self._workflow_id, "steps": steps_buffer}, + ) + trace.log("workflow_completed", {"workflow_id": self._workflow_id}) + return context diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py new file mode 100644 index 0000000..3d9dacd --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +@dataclass(slots=True) +class DocExplainFindFilesContext: + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None = None + retrieved_rows: list[dict] = field(default_factory=list) + rows: list[dict] = field(default_factory=list) + files: list[RetrievedFile] = field(default_factory=list) + gate_decision: EvidenceGateDecision | None = None + answer: str = "" + answer_generated_payload: dict[str, object] | None = None diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py new file mode 100644 index 0000000..c9ee1ab --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/context_protocols.py @@ -0,0 +1,26 @@ +"""Протоколы контекста для workflow doc-explain find-files.""" + +from __future__ import annotations + +from typing import Protocol + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class RetrievalWorkflowContext(Protocol): + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None + retrieved_rows: list[dict] + rows: list[dict] + gate_decision: EvidenceGateDecision | None + answer: str + answer_generated_payload: dict[str, object] | None + + +class FindFilesWorkflowContext(RetrievalWorkflowContext, Protocol): + files: list[RetrievedFile] diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py new file mode 100644 index 0000000..7440006 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_find_files/workflow_runtime/pipeline_logging.py @@ -0,0 +1,106 @@ +"""Логирование retrieval/pipeline/ranking для doc-explain find-files.""" + +from __future__ import annotations + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path + + +def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None: + runtime.trace.module("process.v2.pipeline").log(step, payload) + + +def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None: + runtime.trace.module("process.v2.rag_retrieval").log( + "rag_rows_fetched", + { + "profile": plan.profile, + "row_count": len(rows), + "rows": [trace_row(row) for row in rows], + }, + ) + hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()} + log_pipeline_step( + runtime, + "candidate_generation", + { + "query": route.user_query, + "profile": plan.profile, + "details": { + "target_doc_hints": list(route.anchors.target_doc_hints), + "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)], + }, + "resolved_aliases": route.anchors.matched_aliases, + "target_doc_hints": route.anchors.target_doc_hints, + "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]], + "sources": { + "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths], + "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]], + "semantic": [trace_row(row) for row in retrieved_rows[:5]], + }, + }, + ) + log_pipeline_step( + runtime, + "retrieval_executed", + { + "query": route.user_query, + "profile": plan.profile, + "row_count": len(rows), + "target_doc_hints": route.anchors.target_doc_hints, + "top_results": [trace_row(row) for row in rows[:5]], + }, + ) + + +def log_ranking(runtime, items: list) -> None: + top_docs: list[dict[str, object]] = [] + for item in items[:4]: + top_docs.append( + { + "doc": getattr(item, "path", ""), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + } + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + }, + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "top_docs_after_ranking": top_docs, + "ranking_score_breakdown": [ + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + } + for item in items[:4] + ], + }, + ) + + +def trace_row(row: dict) -> dict[str, object]: + metadata = row.get("metadata") or {} + content = str(row.get("content") or "").strip() + return { + "layer": str(row.get("layer") or ""), + "path": str(row.get("path") or ""), + "title": str(row.get("title") or ""), + "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""), + "entity_name": str(metadata.get("entity_name") or ""), + "summary_text": str(metadata.get("summary_text") or "")[:400], + "section_path": str(metadata.get("section_path") or ""), + "metadata_domain": str(metadata.get("domain") or ""), + "metadata_subdomain": str(metadata.get("subdomain") or ""), + "content_preview": content[:400], + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md new file mode 100644 index 0000000..8ed14ee --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/README.md @@ -0,0 +1,162 @@ +# DOC_EXPLAIN / SUMMARY Workflow + +## Контракт сабинтента + +| Поле | Значение | +|---|---| +| `domain` | `DOCS` | +| `intent` | `DOC_EXPLAIN` | +| `subintent` | `SUMMARY` | +| `workflow_id` | `v2.docs_explain.summary` | +| `source` | `workflow.v2.summary` | + +## Диаграмма флоу + +```mermaid +flowchart TD + A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"] + B --> C["FetchRagRowsStep"] + C --> D["PrepareCandidateRowsStep"] + D --> E["BuildSummaryEvidenceStep"] + E --> F["ApplySummaryEvidenceGateStep"] + F --> G["GenerateSummaryAnswerStep"] +``` + +## Шаги процесса + +### 1) `RequireRagSessionStep` + +Шаг валидирует наличие активной RAG-сессии до retrieval. Если `rag_session_id` отсутствует, workflow завершает обработку и записывает сообщение об отсутствии опоры. Это предотвращает запуск последующих шагов без индекса проекта. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | `V2Process` -> `DocExplainSummaryContext` | Идентификатор RAG-сессии | +| `self._missing_message` | Конфигурация в `graph.py` | Ответ при отсутствии сессии | +| `self._missing_gate` | Конфиг шага | Опциональный gate для раннего выхода | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.answer` | Заполняется `missing_message` при пустом `rag_session_id` | +| `context.gate_decision` | Заполняется, если передан `missing_gate` | +| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе | + +### 2) `ResolveRetrievalPlanStep` + +Шаг вызывает policy-резолвер и строит `RetrievalPlan` для doc-summary сценария. Профиль и фильтры зависят от сигналов route: endpoint/architecture/logic/domain. Результат сохраняется в контекст и логируется в trace. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | `intent_router` | Route с `target_terms` и `anchors` | +| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` | +| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieval_plan` | `self._resolver.resolve(context.route)` | +| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог профиля, слоёв, лимита и фильтров | + +### 3) `FetchRagRowsStep` + +Шаг выполняет retrieval по сформированному плану. Внутри адаптера объединяются seed-результаты по `target_doc_hints` и основной retrieval. Это даёт более устойчивую выдачу как по явным hints, так и по семантическому совпадению. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | Контекст workflow | Сессия для поиска | +| `context.route.normalized_query` | Route | Нормализованный запрос | +| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` | + +### 4) `PrepareCandidateRowsStep` + +Шаг обогащает найденные строки через metadata lookup и подмешивает кандидатов из `target_doc_hints`. На выходе формируется `context.rows`, который уже используется для построения summary-evidence. Шаг также пишет подробные retrieval-логи для анализа ранжирования. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.retrieved_rows` | `FetchRagRowsStep` | Строки retrieval | +| `context.route` | Route | Сигналы hints/aliases/terms | +| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata поиска | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` | +| `process.v2.rag_retrieval.rag_rows_fetched` | Лог выборки и источников кандидатов | + +### 5) `BuildSummaryEvidenceStep` + +Шаг преобразует candidate rows в список `RetrievedSummary` через `DocsEvidenceAssembler.assemble_summaries`. Здесь формируется shortlist документов с оценками и причинами совпадения, который затем проверяется gate. Дополнительно логируются ranking-разборы. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты | +| `context.route` | Route | Сигналы для ranking | +| `self._assembler` | DI из `graph.py` | Сборщик summary evidence | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.documents` | `assemble_summaries(context.rows, context.route)` | +| `process.v2.evidence.evidence_assembled` | Лог `document_count` и списка путей | + +### 6) `ApplySummaryEvidenceGateStep` + +Шаг оценивает достаточность и надёжность собранного summary-evidence. Решение gate влияет на дальнейшее формирование ответа: либо прямой отказ/ограничение, либо генерация summary. Результат логируется в pipeline. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | Route | Контекст запроса | +| `context.documents` | `BuildSummaryEvidenceStep` | Кандидаты summary | +| `self._gate` | DI из `graph.py` | Правила валидации evidence | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` | +| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` | + +### 7) `GenerateSummaryAnswerStep` + +Шаг формирует финальный ответ: gate-message, детерминированный ответ или LLM-генерация. При LLM-ветке собирается `prompt_input` из пользовательского запроса, route-сигналов и найденных summary-блоков. Итог всегда записывается в `context.answer` и сопровождается `answer_generated_payload`. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы | +| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Режим допуска к генерации | +| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм | +| `context.prompt_name` | `V2Process` | Имя prompt-шаблона | +| `self._llm` | DI из `graph.py` | LLM-сервис | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.prompt_input` | Склеивается из route + shortlist документов | +| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` | +| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по выбранной ветке | + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py new file mode 100644 index 0000000..5dd059b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/__init__.py @@ -0,0 +1,3 @@ +from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph + +__all__ = ["DocExplainSummaryGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py new file mode 100644 index 0000000..dd43d50 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/graph.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.buffered_graph import DocExplainSummaryWorkflowGraph +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.apply_summary_evidence_gate_step import ( + ApplySummaryEvidenceGateStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.fetch_rag_rows_step import FetchRagRowsStep +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.generate_summary_answer_step import ( + GenerateSummaryAnswerStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.prepare_candidate_rows_step import ( + PrepareCandidateRowsStep, +) +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.require_rag_session_step import RequireRagSessionStep +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.resolve_retrieval_plan_step import ( + ResolveRetrievalPlanStep, +) +from app.core.agent.utils.llm import AgentLlmService +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter + + +class DocExplainSummaryGraph(DocExplainSummaryWorkflowGraph[DocExplainSummaryContext]): + def __init__( + self, + llm: AgentLlmService, + policy_resolver: RetrievalPlanResolver, + rag_adapter: V2RagRetrievalAdapter, + evidence_assembler: DocsEvidenceAssembler, + evidence_gate: DocsEvidenceGate, + ) -> None: + super().__init__( + workflow_id="v2.docs_explain.summary", + source="workflow.v2.summary", + steps=[ + RequireRagSessionStep( + missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией." + ), + ResolveRetrievalPlanStep(policy_resolver), + FetchRagRowsStep(rag_adapter), + PrepareCandidateRowsStep(CandidateRowsBuilder()), + BuildSummaryEvidenceStep(evidence_assembler), + ApplySummaryEvidenceGateStep(evidence_gate), + GenerateSummaryAnswerStep(llm), + ], + ) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py new file mode 100644 index 0000000..8b057d4 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/apply_summary_evidence_gate_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=SummaryWorkflowContext) + + +class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "apply_summary_evidence_gate" + title = "Проверка summary evidence" + + def __init__(self, gate: DocsEvidenceGate) -> None: + self._gate = gate + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.gate_decision = self._gate.check_summaries(context.route, context.documents) + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": context.gate_decision.passed, + "reason": context.gate_decision.reason, + "answer_mode": context.gate_decision.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"passed": bool(context.gate_decision and context.gate_decision.passed)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py new file mode 100644 index 0000000..7b8030c --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/build_summary_evidence_step.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=SummaryWorkflowContext) + + +class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "build_summary_evidence" + title = "Сборка summary evidence" + + def __init__(self, assembler: DocsEvidenceAssembler) -> None: + self._assembler = assembler + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.documents = self._assembler.assemble_summaries(context.rows, context.route) + context.runtime.trace.module("process.v2.evidence").log( + "evidence_assembled", + { + "mode": "summary", + "document_count": len(context.documents), + "documents": [item.path for item in context.documents], + }, + ) + log_pipeline_step( + context.runtime, + "evidence_assembled", + { + "mode": "summary", + "primary_doc": context.documents[0].path if context.documents else None, + "document_count": len(context.documents), + }, + ) + log_ranking(context.runtime, context.documents) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"document_count": len(context.documents)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py new file mode 100644 index 0000000..d4009e3 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/fetch_rag_rows_step.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "fetch_rag_rows" + title = "Получение строк из RAG" + + def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None: + self._rag_adapter = rag_adapter + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + context.retrieved_rows = await self._rag_adapter.fetch_rows( + context.rag_session_id, + context.route.normalized_query, + context.retrieval_plan, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"retrieved_row_count": len(context.retrieved_rows)} diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py similarity index 63% rename from src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py rename to src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py index 030fa50..dfeb7a8 100644 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/steps/generate_summary_answer_step.py +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/generate_summary_answer_step.py @@ -3,28 +3,36 @@ from __future__ import annotations import asyncio import json -from app.core.agent.processes.v2.anchor_signals import route_anchor_summary +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext from app.core.agent.utils.llm import AgentLlmService -from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext +from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary from app.core.agent.utils.workflow import WorkflowStep -class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]): +class GenerateSummaryAnswerStep(WorkflowStep[DocExplainSummaryContext]): step_id = "generate_summary_answer" title = "Сборка ответа по summary" def __init__(self, llm: AgentLlmService) -> None: self._llm = llm - async def run(self, context: DocsExplainSummaryContext) -> DocsExplainSummaryContext: + async def run(self, context: DocExplainSummaryContext) -> DocExplainSummaryContext: + if context.answer: + return context if context.gate_decision is not None and not context.gate_decision.passed: context.answer = context.gate_decision.message + context.answer_generated_payload = { + "answer_mode": context.gate_decision.answer_mode, + "answer_length": len(context.answer), + } return context if not context.workflow_llm_enabled: context.answer = self._build_deterministic_answer(context) + context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)} return context if not context.documents: context.answer = "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу." + context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)} return context context.prompt_input = self._build_prompt_input(context) request_id = context.runtime.request.request_id @@ -35,9 +43,10 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]): log_context=f"agent:{request_id}", trace=context.runtime.trace.module("workflow.v2.summary.llm"), ) + context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)} return context - def _build_prompt_input(self, context: DocsExplainSummaryContext) -> str: + def _build_prompt_input(self, context: DocExplainSummaryContext) -> str: blocks = [ f"Запрос пользователя:\n{context.route.user_query}", "Сигналы запроса:\n" + json.dumps(route_anchor_summary(context.route), ensure_ascii=False, indent=2), @@ -52,17 +61,13 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]): ) return "\n\n".join(blocks) - def _build_deterministic_answer(self, context: DocsExplainSummaryContext) -> str: + def _build_deterministic_answer(self, context: DocExplainSummaryContext) -> str: if not context.documents: return "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу." - lines = [] - primary = context.documents[0] - lines.append(primary.summary) - lines.append("") - lines.append("Файлы-источники:") + lines = [context.documents[0].summary, "", "Файлы-источники:"] for item in context.documents: lines.append(f"- {item.path}") return "\n".join(lines) - def trace_output(self, context: DocsExplainSummaryContext) -> dict[str, object]: + def trace_output(self, context: DocExplainSummaryContext) -> dict[str, object]: return {"answer_length": len(context.answer)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py new file mode 100644 index 0000000..87a3d31 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prepare_candidate_rows_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_retrieval_trace +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "prepare_candidate_rows" + title = "Подготовка candidate rows" + + def __init__(self, builder: CandidateRowsBuilder) -> None: + self._builder = builder + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + prepared = self._builder.build(context.retrieved_rows, context.route) + context.rows = prepared.rows + log_retrieval_trace( + context.runtime, + context.route, + context.retrieval_plan, + context.retrieved_rows, + prepared.metadata_rows, + prepared.rows, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"row_count": len(context.rows)} diff --git a/src/app/core/agent/processes/v2/prompts.yml b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml similarity index 100% rename from src/app/core/agent/processes/v2/prompts.yml rename to src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py new file mode 100644 index 0000000..78e6694 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/require_rag_session_step.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "require_rag_session" + title = "Проверка RAG-сессии" + + def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None: + self._missing_message = missing_message + self._missing_gate = missing_gate + + async def run(self, context: TContext) -> TContext: + if context.rag_session_id: + return context + context.answer = self._missing_message + if self._missing_gate is not None: + context.gate_decision = self._missing_gate + context.answer_generated_payload = { + "answer_mode": self._missing_gate.answer_mode, + "answer_length": len(context.answer), + } + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": self._missing_gate.passed, + "reason": self._missing_gate.reason, + "answer_mode": self._missing_gate.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"has_rag_session": bool(context.rag_session_id)} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py new file mode 100644 index 0000000..a4c5a58 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/resolve_retrieval_plan_step.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "resolve_retrieval_plan" + title = "Выбор retrieval-плана" + + def __init__(self, resolver: RetrievalPlanResolver) -> None: + self._resolver = resolver + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + plan = self._resolver.resolve(context.route) + context.retrieval_plan = plan + context.runtime.trace.module("process.v2.retrieval_policy").log( + "retrieval_plan_resolved", + {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters}, + ) + log_pipeline_step( + context.runtime, + "retrieval_profile_selected", + {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters}, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"profile": getattr(context.retrieval_plan, "profile", "")} diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py new file mode 100644 index 0000000..0fac291 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/__init__.py @@ -0,0 +1,2 @@ +"""Retrieval-related step helpers for the doc-explain summary workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py new file mode 100644 index 0000000..5d6d81f --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/candidate_rows.py @@ -0,0 +1,43 @@ +"""Сборка candidate rows для doc-explain summary (метаданные + сиды по hints).""" + +from __future__ import annotations + +from dataclasses import dataclass + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import ( + RagRowIndex, + merge_row_lists, + normalize_doc_path, + normalized_path_set, + seed_candidates_from_target_hints, +) + + +@dataclass(slots=True) +class CandidateRowsResult: + metadata_rows: list[dict] + rows: list[dict] + + +class CandidateRowsBuilder: + def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult: + metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route) + rows = merge_row_lists(retrieved_rows, metadata_rows) + rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows)) + self._print_missing_target_hints(route, rows) + return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows) + + def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None: + if not route.anchors.target_doc_hints: + return + candidate_paths = normalized_path_set(rows) + for hint in route.anchors.target_doc_hints: + if not str(hint or "").strip(): + continue + normalized = normalize_doc_path(hint) + if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]: + continue + if normalized not in candidate_paths: + print("ERROR: target doc missing from candidates:", normalized) diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py new file mode 100644 index 0000000..331c087 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/retrieval/retrieval_policy.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types +from app.core.agent.utils.process_v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent +from app.core.rag.contracts.enums import RagLayer +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class DocExplainSummaryRetrievalPolicy: + _SUMMARY_LAYERS = { + "docs_api_method_explain": [ + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_FACT_INDEX, + RagLayer.DOCS_DOC_CHUNKS, + ], + "docs_summary_api_endpoint": [ + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_FACT_INDEX, + RagLayer.DOCS_DOC_CHUNKS, + ], + "docs_summary_logic_flow": [ + RagLayer.DOCS_WORKFLOW_INDEX, + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_DOC_CHUNKS, + ], + "docs_summary_domain_entity": [ + RagLayer.DOCS_ENTITY_CATALOG, + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_DOC_CHUNKS, + ], + "docs_summary_architecture": [ + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_RELATION_GRAPH, + RagLayer.DOCS_DOC_CHUNKS, + ], + "docs_summary_generic": [ + RagLayer.DOCS_DOCUMENT_CATALOG, + RagLayer.DOCS_DOC_CHUNKS, + ], + } + _API_DOC_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"] + + def supports(self, route: V2RouteResult) -> bool: + return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.SUMMARY + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + profile = self._profile(route) + return RetrievalPlan( + profile=profile, + layers=list(self._SUMMARY_LAYERS[profile]), + limit=10 if profile == "docs_api_method_explain" else 8, + filters=self._filters(route), + ) + + def _filters(self, route: V2RouteResult) -> dict[str, object]: + if self._is_api_method_explain(route): + return self._api_method_filters(route) + filters = self._base_filters(route) + filters["prefer_path_prefixes"] = self._summary_prefixes(route) + filters["prefer_like_patterns"] = self._prefer_like_patterns(route) + if V2AnchorType.API_ENDPOINT in anchor_signal_types(route): + filters["path_prefixes"] = ["docs/api/", "docs/"] + return filters + + def _base_filters(self, route: V2RouteResult) -> dict[str, object]: + filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)} + if route.anchors.process_domain: + filters["metadata.domain"] = route.anchors.process_domain + if route.anchors.process_subdomain: + filters["metadata.subdomain"] = route.anchors.process_subdomain + return filters + + def _api_method_filters(self, route: V2RouteResult) -> dict[str, object]: + filters = self._base_filters(route) + filters["path_prefixes"] = list(self._API_DOC_PREFIXES) + filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES) + filters["prefer_like_patterns"] = self._api_method_patterns(route) + return filters + + def _profile(self, route: V2RouteResult) -> str: + if self._is_api_method_explain(route): + return "docs_api_method_explain" + meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES} + if len(meaningful) != 1: + return "docs_summary_generic" + mapping = { + V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint", + V2AnchorType.ARCHITECTURE: "docs_summary_architecture", + V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow", + V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity", + } + return mapping.get(next(iter(meaningful)), "docs_summary_generic") + + def _summary_prefixes(self, route: V2RouteResult) -> list[str]: + signals = anchor_signal_types(route) + prefixes: list[str] = [] + if V2AnchorType.API_ENDPOINT in signals: + prefixes.extend(["docs/api/", "docs/"]) + if V2AnchorType.ARCHITECTURE in signals: + prefixes.extend(["docs/architecture/", "docs/"]) + if V2AnchorType.LOGIC_FLOW in signals: + prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"]) + if V2AnchorType.DOMAIN_ENTITY in signals: + prefixes.extend(["docs/domains/", "docs/", "docs/api/"]) + return _unique_terms(prefixes or ["docs/"]) + + def _prefer_like_patterns(self, route: V2RouteResult) -> list[str]: + terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()] + terms.extend(route.anchors.endpoint_paths) + terms.extend(route.target_terms) + terms.extend(route.anchors.file_names) + terms.extend(route.anchors.entity_names) + terms.extend(route.anchors.matched_aliases) + if route.anchors.process_domain: + terms.append(route.anchors.process_domain) + if route.anchors.process_subdomain: + terms.append(route.anchors.process_subdomain) + return [f"%{term.lower()}%" for term in _unique_terms(terms)] + + def _api_method_patterns(self, route: V2RouteResult) -> list[str]: + terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()] + terms.extend(route.anchors.target_doc_hints) + terms.extend(route.anchors.endpoint_paths) + terms.extend(route.target_terms) + patterns: list[str] = [] + for term in _unique_terms(terms): + lowered = term.lower() + stripped = lowered.strip("/") + if stripped: + patterns.append(f"%{stripped}%") + if lowered: + patterns.append(f"%{lowered}%") + return _unique_terms(patterns) + + def _is_api_method_explain(self, route: V2RouteResult) -> bool: + if route.subintent != V2Subintent.SUMMARY: + return False + if route.anchors.endpoint_paths: + return True + if _has_api_like_hints(route.anchors.target_doc_hints): + return True + return V2AnchorType.API_ENDPOINT in anchor_signal_types(route) + + +def _unique_terms(items: list[str]) -> list[str]: + seen: set[str] = set() + unique: list[str] = [] + for raw in items: + value = str(raw or "").strip() + if not value or value in seen: + continue + seen.add(value) + unique.append(value) + return unique + + +def _has_api_like_hints(hints: list[str]) -> bool: + for hint in hints: + value = str(hint or "").strip().lower() + if not value: + continue + if value.startswith("/"): + return True + if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")): + return True + if "endpoint" in value or "method" in value: + return True + return False diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py new file mode 100644 index 0000000..b8a6f58 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime helpers for the doc-explain summary workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py new file mode 100644 index 0000000..ba0c1ed --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/buffered_graph.py @@ -0,0 +1,46 @@ +"""Граф workflow doc-explain summary: буфер шагов и один сброс в trace (на базе utils.workflow).""" + +from __future__ import annotations + +from typing import TypeVar + +from app.core.agent.utils.workflow.context import WorkflowContext +from app.core.agent.utils.workflow.graph import WorkflowGraph + +TContext = TypeVar("TContext", bound=WorkflowContext) + + +class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]): + """Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``.""" + + async def run(self, context: TContext) -> TContext: + trace = context.runtime.trace.module(self._source) + trace.log("workflow_started", {"workflow_id": self._workflow_id}) + steps_buffer: list[dict[str, object]] = [] + for step in self._steps: + inp = step.trace_input(context) + request_id = context.runtime.request.request_id + await context.runtime.publisher.publish_status( + request_id, + self._source, + f"Шаг workflow: {step.title}.", + {"workflow_id": self._workflow_id, "step_id": step.step_id}, + ) + context = await step.run(context) + out = step.trace_output(context) + trace.log( + "workflow_step_traced", + { + "workflow_id": self._workflow_id, + "step": {"id": step.step_id, "title": step.title}, + "input": inp, + "output": out, + }, + ) + steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) + trace.log( + "workflow_trace_flushed", + {"workflow_id": self._workflow_id, "steps": steps_buffer}, + ) + trace.log("workflow_completed", {"workflow_id": self._workflow_id}) + return context diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py new file mode 100644 index 0000000..5460b84 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +@dataclass(slots=True) +class DocExplainSummaryContext: + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + prompt_name: str + workflow_llm_enabled: bool = True + retrieval_plan: RetrievalPlan | None = None + retrieved_rows: list[dict] = field(default_factory=list) + rows: list[dict] = field(default_factory=list) + documents: list[RetrievedSummary] = field(default_factory=list) + gate_decision: EvidenceGateDecision | None = None + prompt_input: str = "" + answer: str = "" + answer_generated_payload: dict[str, object] | None = None diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py new file mode 100644 index 0000000..f7bd89f --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/context_protocols.py @@ -0,0 +1,26 @@ +"""Протоколы контекста для workflow doc-explain summary.""" + +from __future__ import annotations + +from typing import Protocol + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class RetrievalWorkflowContext(Protocol): + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None + retrieved_rows: list[dict] + rows: list[dict] + gate_decision: EvidenceGateDecision | None + answer: str + answer_generated_payload: dict[str, object] | None + + +class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol): + documents: list[RetrievedSummary] diff --git a/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py new file mode 100644 index 0000000..a1034c4 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_explain_summary/workflow_runtime/pipeline_logging.py @@ -0,0 +1,106 @@ +"""Логирование retrieval/pipeline/ranking для doc-explain summary.""" + +from __future__ import annotations + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path + + +def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None: + runtime.trace.module("process.v2.pipeline").log(step, payload) + + +def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None: + runtime.trace.module("process.v2.rag_retrieval").log( + "rag_rows_fetched", + { + "profile": plan.profile, + "row_count": len(rows), + "rows": [trace_row(row) for row in rows], + }, + ) + hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()} + log_pipeline_step( + runtime, + "candidate_generation", + { + "query": route.user_query, + "profile": plan.profile, + "details": { + "target_doc_hints": list(route.anchors.target_doc_hints), + "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)], + }, + "resolved_aliases": route.anchors.matched_aliases, + "target_doc_hints": route.anchors.target_doc_hints, + "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]], + "sources": { + "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths], + "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]], + "semantic": [trace_row(row) for row in retrieved_rows[:5]], + }, + }, + ) + log_pipeline_step( + runtime, + "retrieval_executed", + { + "query": route.user_query, + "profile": plan.profile, + "row_count": len(rows), + "target_doc_hints": route.anchors.target_doc_hints, + "top_results": [trace_row(row) for row in rows[:5]], + }, + ) + + +def log_ranking(runtime, items: list) -> None: + top_docs: list[dict[str, object]] = [] + for item in items[:4]: + top_docs.append( + { + "doc": getattr(item, "path", ""), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + } + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + }, + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "top_docs_after_ranking": top_docs, + "ranking_score_breakdown": [ + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + } + for item in items[:4] + ], + }, + ) + + +def trace_row(row: dict) -> dict[str, object]: + metadata = row.get("metadata") or {} + content = str(row.get("content") or "").strip() + return { + "layer": str(row.get("layer") or ""), + "path": str(row.get("path") or ""), + "title": str(row.get("title") or ""), + "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""), + "entity_name": str(metadata.get("entity_name") or ""), + "summary_text": str(metadata.get("summary_text") or "")[:400], + "section_path": str(metadata.get("section_path") or ""), + "metadata_domain": str(metadata.get("domain") or ""), + "metadata_subdomain": str(metadata.get("subdomain") or ""), + "content_preview": content[:400], + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py new file mode 100644 index 0000000..0fb356a --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/__init__.py @@ -0,0 +1,2 @@ +"""DOC_UPDATE/FROM_FEATURE workflow package.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py new file mode 100644 index 0000000..08d6d27 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/__init__.py @@ -0,0 +1,13 @@ +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import ( + DocRulesChangesetGenerator, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector + +__all__ = [ + "DocRulesBundle", + "DocRulesChangesetGenerator", + "DocRulesLoader", + "DocRulesSelector", +] diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py new file mode 100644 index 0000000..b392835 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/changeset_generator.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import hashlib +import json +from pathlib import Path + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.prompt_builder import DocChangePromptBuilder +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange +from app.core.agent.utils.llm import AgentLlmService +from app.infra.observability.module_trace import ModuleTrace +from app.schemas.changeset import ChangeItem + + +class DocRulesChangesetGenerator: + def __init__( + self, + llm: AgentLlmService, + selector: DocRulesSelector | None = None, + prompt_builder: DocChangePromptBuilder | None = None, + ) -> None: + self._llm = llm + self._selector = selector or DocRulesSelector() + self._prompt_builder = prompt_builder or DocChangePromptBuilder() + + def generate( + self, + *, + bundle: DocRulesBundle, + item: PlannedChange, + domain: str, + subdomain: str, + project_root: str, + trace: ModuleTrace | None = None, + ) -> tuple[ChangeItem | None, str | None]: + selected = self._selector.select(bundle, item.doc_type) + if selected is None: + return None, f"Для doc_type '{item.doc_type}' не найдено полных doc_rules (artifact/template)." + prompt_input = self._prompt_builder.build(item=item, rules=selected, domain=domain, subdomain=subdomain) + if trace is not None: + trace.log( + "changeset_prompt_built", + { + "doc_type": item.doc_type, + "path": item.path, + "prompt_chars": len(prompt_input), + "rules_chars": len(selected.render_for_prompt()), + }, + ) + raw = self._llm.generate( + "v2_docs_update.build_doc_changeset", + prompt_input, + log_context="workflow.v2.docs_update.from_feature.changeset", + trace=trace, + ) + payload = self._parse_json(raw) + if payload is None: + return None, f"LLM вернул невалидный JSON changeset для {item.path}." + payload["op"] = item.op + payload["path"] = item.path + payload["reason"] = str(payload.get("reason") or item.reason)[:500] + if item.op in {"update", "delete"}: + base_hash = self._resolve_base_hash(project_root, item.path) + if not base_hash: + return None, f"{item.op.upper()} пропущен: не удалось вычислить base_hash для {item.path}." + payload["base_hash"] = base_hash + if item.op == "delete": + payload.pop("proposed_content", None) + try: + return ChangeItem.model_validate(payload), None + except Exception as exc: + return None, f"Невалидный changeset item для {item.path}: {exc}" + + def _parse_json(self, raw: str) -> dict[str, object] | None: + text = str(raw or "").strip() + if not text: + return None + try: + value = json.loads(text) + return value if isinstance(value, dict) else None + except json.JSONDecodeError: + return None + + def _resolve_base_hash(self, project_root: str, rel_path: str) -> str: + root = Path(project_root or "").expanduser() + if not root.is_absolute(): + return "" + target = root / rel_path + if not target.exists() or not target.is_file(): + return "" + try: + content = target.read_text(encoding="utf-8") + except Exception: + return "" + return hashlib.sha256(content.encode("utf-8")).hexdigest() diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py new file mode 100644 index 0000000..20dc788 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/loader.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from pathlib import Path + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle + + +class DocRulesLoader: + def __init__(self, root: Path | None = None) -> None: + base = root or (Path(__file__).resolve().parents[3] / "doc_rules") + self._root = base + + @property + def root(self) -> Path: + return self._root + + def load(self) -> DocRulesBundle: + return DocRulesBundle( + documentation_rules=self._read_file(self._root / "documentation-rules.md"), + global_rules=self._read_folder(self._root / "global", suffix=".md"), + artifact_rules=self._read_folder(self._root / "artifact-types", suffix=".md"), + templates=self._read_templates(self._root / "templates"), + sections=self._read_folder(self._root / "sections", suffix=".md"), + ) + + def _read_templates(self, folder: Path) -> dict[str, str]: + data: dict[str, str] = {} + if not folder.exists() or not folder.is_dir(): + return data + for path in sorted(folder.glob("*.template.md")): + key = path.name[: -len(".template.md")] + data[key] = self._safe_read(path) + return data + + def _read_folder(self, folder: Path, suffix: str) -> dict[str, str]: + data: dict[str, str] = {} + if not folder.exists() or not folder.is_dir(): + return data + for path in sorted(folder.glob(f"*{suffix}")): + data[path.stem] = self._safe_read(path) + return data + + def _read_file(self, path: Path) -> str: + return self._safe_read(path) if path.exists() and path.is_file() else "" + + def _safe_read(self, path: Path) -> str: + try: + return path.read_text(encoding="utf-8") + except Exception: + return "" diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py new file mode 100644 index 0000000..b673c79 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/models.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from dataclasses import dataclass +from dataclasses import field + + +@dataclass(slots=True) +class DocRulesBundle: + documentation_rules: str = "" + global_rules: dict[str, str] = field(default_factory=dict) + artifact_rules: dict[str, str] = field(default_factory=dict) + templates: dict[str, str] = field(default_factory=dict) + sections: dict[str, str] = field(default_factory=dict) + + @property + def supported_doc_types(self) -> set[str]: + return set(self.artifact_rules.keys()) & set(self.templates.keys()) + + def has_doc_type(self, doc_type: str) -> bool: + return doc_type in self.supported_doc_types + + def render_global(self) -> str: + parts: list[str] = [] + if self.documentation_rules.strip(): + parts.append("### documentation-rules.md") + parts.append(self.documentation_rules.strip()) + for name in sorted(self.global_rules.keys()): + value = self.global_rules.get(name, "").strip() + if not value: + continue + parts.append(f"### global/{name}.md") + parts.append(value) + return "\n\n".join(parts).strip() + + +@dataclass(slots=True) +class SelectedDocRules: + doc_type: str + global_text: str + artifact_text: str + template_text: str + section_texts: dict[str, str] = field(default_factory=dict) + + def render_for_prompt(self) -> str: + parts: list[str] = [] + if self.global_text.strip(): + parts.append("## Global rules") + parts.append(self.global_text.strip()) + if self.artifact_text.strip(): + parts.append(f"## Artifact rules ({self.doc_type})") + parts.append(self.artifact_text.strip()) + if self.template_text.strip(): + parts.append(f"## Template ({self.doc_type})") + parts.append(self.template_text.strip()) + for name in sorted(self.section_texts.keys()): + text = self.section_texts.get(name, "").strip() + if not text: + continue + parts.append(f"## Section rule: {name}") + parts.append(text) + return "\n\n".join(parts).strip() diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py new file mode 100644 index 0000000..f8156ac --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/prompt_builder.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import json + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import SelectedDocRules +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange + + +class DocChangePromptBuilder: + def build(self, item: PlannedChange, rules: SelectedDocRules, domain: str, subdomain: str) -> str: + payload = { + "change_request": { + "op": item.op, + "path": item.path, + "doc_type": item.doc_type, + "doc_id": item.doc_id, + "title": item.title, + "domain": domain, + "sub_domain": subdomain, + "reason": item.reason, + "source_refs": item.source_refs, + "related_docs": item.related_docs, + "requirement_body": item.requirement_body, + }, + "doc_rules_context": rules.render_for_prompt(), + } + return json.dumps(payload, ensure_ascii=False, indent=2) diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py new file mode 100644 index 0000000..44930ab --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/doc_rules_pipeline/selector.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import SelectedDocRules + + +class DocRulesSelector: + _DEFAULT_SECTIONS: tuple[str, ...] = ("summary", "details", "requirements-format") + _SECTIONS_BY_TYPE: dict[str, tuple[str, ...]] = { + "api_method": ("summary", "details", "api-scenario", "api-contract", "requirements-format"), + "integration_doc": ("summary", "details", "api-contract", "requirements-format"), + "ui_page": ("summary", "details", "requirements-format"), + "logic_block": ("summary", "details", "requirements-format"), + "architecture_overview": ("summary", "details", "requirements-format"), + "domain_entity": ("summary", "details", "requirements-format"), + } + + def select(self, bundle: DocRulesBundle, doc_type: str) -> SelectedDocRules | None: + artifact = bundle.artifact_rules.get(doc_type, "").strip() + template = bundle.templates.get(doc_type, "").strip() + if not artifact or not template: + return None + names = self._SECTIONS_BY_TYPE.get(doc_type, self._DEFAULT_SECTIONS) + section_texts = {name: bundle.sections.get(name, "") for name in names if bundle.sections.get(name, "").strip()} + return SelectedDocRules( + doc_type=doc_type, + global_text=bundle.render_global(), + artifact_text=artifact, + template_text=template, + section_texts=section_texts, + ) diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py new file mode 100644 index 0000000..7707422 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/graph.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.buffered_graph import ( + DocUpdateFromFeatureWorkflowGraph, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import ( + DocUpdateFromFeatureContext, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.build_change_instructions_step import ( + BuildChangeInstructionsStep, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.build_change_plan_step import BuildChangePlanStep +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.finalize_answer_step import FinalizeAnswerStep +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.load_doc_rules_step import LoadDocRulesStep +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.load_source_content_step import LoadSourceContentStep +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.parse_feature_requirements_step import ( + ParseFeatureRequirementsStep, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.resolve_source_step import ResolveSourceStep +from app.core.agent.utils.llm import AgentLlmService + + +class DocUpdateFromFeatureGraph(DocUpdateFromFeatureWorkflowGraph[DocUpdateFromFeatureContext]): + def __init__(self, llm: AgentLlmService, doc_rules_enabled: bool = True) -> None: + super().__init__( + workflow_id="v2.docs_update.from_feature", + source="workflow.v2.docs_update.from_feature", + steps=[ + ResolveSourceStep(), + LoadSourceContentStep(), + ParseFeatureRequirementsStep(), + LoadDocRulesStep(), + BuildChangePlanStep(llm=llm), + BuildChangeInstructionsStep(llm=llm, doc_rules_enabled=doc_rules_enabled), + FinalizeAnswerStep(), + ], + ) diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py new file mode 100644 index 0000000..8b6f55a --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/__init__.py @@ -0,0 +1,2 @@ +"""Steps for DOC_UPDATE/FROM_FEATURE workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py new file mode 100644 index 0000000..c95510b --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_instructions_step.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import ( + DocRulesChangesetGenerator, +) +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.llm import AgentLlmService +from app.core.agent.utils.workflow import WorkflowStep +from app.schemas.changeset import ChangeItem +from app.schemas.changeset import ChangeOp +from app.schemas.changeset import PatchHunk + + +class BuildChangeInstructionsStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "build_changeset" + title = "Формирование changeset" + + def __init__(self, llm: AgentLlmService, doc_rules_enabled: bool = True) -> None: + self._doc_rules_enabled = doc_rules_enabled + self._generator = DocRulesChangesetGenerator(llm=llm) + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer or not context.planned_changes: + return context + llm_trace = context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm") + domain = (context.analytics_meta.domains or ["unknown"])[0] + subdomain = (context.analytics_meta.subdomains or ["unknown"])[0] + bundle = context.doc_rules_bundle + use_doc_rules = self._doc_rules_enabled and context.doc_rules_enabled and bundle is not None + for item in context.planned_changes: + if use_doc_rules: + assert bundle is not None + payload, issue = self._generator.generate( + bundle=bundle, + item=item, + domain=domain, + subdomain=subdomain, + project_root=context.project_root, + trace=llm_trace, + ) + if issue: + context.issues.append(issue) + continue + if payload is not None: + context.changeset.append(payload) + continue + payload = self._build_legacy_change(context, item, domain, subdomain) + if payload is not None: + context.changeset.append(payload) + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return {"changeset_items": len(context.changeset), "issues": len(context.issues)} + + def _build_legacy_change(self, context: DocUpdateFromFeatureContext, item, domain: str, subdomain: str) -> ChangeItem | None: + op = ChangeOp(item.op) + if op == ChangeOp.DELETE: + base_hash = self._resolve_base_hash(context, item.path) + if not base_hash: + context.issues.append(f"DELETE пропущен: не удалось вычислить base_hash для {item.path}.") + return None + return ChangeItem( + op=op, + path=item.path, + base_hash=base_hash, + reason=item.reason, + ) + content = self._build_doc_content(item.doc_id, item.title, item.doc_type, domain, subdomain, item) + if op == ChangeOp.CREATE: + return ChangeItem(op=op, path=item.path, proposed_content=content, reason=item.reason) + base_hash = self._resolve_base_hash(context, item.path) + if not base_hash: + context.issues.append(f"UPDATE пропущен: не удалось вычислить base_hash для {item.path}.") + return None + return ChangeItem( + op=op, + path=item.path, + base_hash=base_hash, + proposed_content=content, + reason=item.reason, + hunks=[PatchHunk(type="append_end", new_text="\n\n## Updated From System Analysis\n\nТребуется обновить содержимое по changeset.\n")], + ) + + def _build_doc_content(self, doc_id: str, title: str, doc_type: str, domain: str, subdomain: str, item) -> str: + related = "\n".join(f" - {ref}" for ref in item.related_docs) if item.related_docs else " - TBD" + source_refs = "\n".join(f" - {ref}" for ref in item.source_refs) if item.source_refs else " - TBD" + functional_requirements = item.requirement_body.strip() or "Требования не были явно извлечены из unit." + return ( + "---\n" + f"id: {doc_id}\n" + f"title: {title}\n" + f"doc_type: {doc_type}\n" + f"domain: {domain}\n" + f"sub_domain: {subdomain}\n" + "status: generated\n" + "related_docs:\n" + f"{related}\n" + "source_of_truth: system_analysis\n" + "system_analytics_refs:\n" + f"{source_refs}\n" + "---\n\n" + "## Context\n\n" + "Черновик сгенерирован workflow DOC_UPDATE/FROM_FEATURE на основе системной аналитики.\n\n" + "## Functional Requirements\n\n" + f"{functional_requirements}\n" + ) + + def _resolve_base_hash(self, context: DocUpdateFromFeatureContext, rel_path: str) -> str: + root = Path(context.project_root or "").expanduser() + if not root.is_absolute(): + return "" + target = root / rel_path + if not target.exists() or not target.is_file(): + return "" + try: + content = target.read_text(encoding="utf-8") + except Exception: + return "" + return hashlib.sha256(content.encode("utf-8")).hexdigest() diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py new file mode 100644 index 0000000..f44220e --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/build_change_plan_step.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import json +import re + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.docs_state_loader import DocsState +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.system_rules import ( + ALLOWED_DOC_TYPES, + DOC_TYPE_TO_FOLDER, + SYSTEM_RULES_TEXT, +) +from app.core.agent.utils.llm import AgentLlmService +from app.core.agent.utils.workflow import WorkflowStep +from app.core.rag.persistence.query_repository import RagQueryRepository + + +class BuildChangePlanStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "build_change_plan" + title = "Построение плана изменений" + + def __init__(self, llm: AgentLlmService, query_repository: RagQueryRepository | None = None) -> None: + self._llm = llm + self._query_repository = query_repository or RagQueryRepository() + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer or not context.units: + return context + self._load_docs_state(context) + inferred_types = self._infer_missing_types(context) + state = DocsState.from_rows(context.docs_catalog_rows) + for index, unit in enumerate(context.units): + planned = self._build_unit_plan(context, unit, state, inferred_types.get(index, "")) + if planned is None: + continue + context.planned_changes.append(planned) + if not context.planned_changes: + context.issues.append("Не удалось построить план изменений по функциональным требованиям.") + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return { + "docs_rows": len(context.docs_catalog_rows), + "planned_changes": len(context.planned_changes), + "issues": len(context.issues), + } + + def _load_docs_state(self, context: DocUpdateFromFeatureContext) -> None: + if not context.rag_session_id: + context.issues.append("Отсутствует active RAG session: невозможно сопоставить изменения с текущей документацией.") + return + try: + context.docs_catalog_rows = self._query_repository.list_docs_scope_index_rows(context.rag_session_id) + except Exception as exc: + context.issues.append(f"Не удалось загрузить состояние документации из RAG: {exc}") + + def _infer_missing_types(self, context: DocUpdateFromFeatureContext) -> dict[int, str]: + missing: list[tuple[int, str, str]] = [] + for idx, unit in enumerate(context.units): + value = str(unit.metadata.get("type") or "").strip() + if not value: + missing.append((idx, unit.heading, unit.body[:400])) + if not missing: + return {} + payload = { + "system_rules": SYSTEM_RULES_TEXT, + "allowed_doc_types": list(ALLOWED_DOC_TYPES), + "items": [{"index": idx, "heading": h, "snippet": snippet} for idx, h, snippet in missing], + } + raw = self._llm.generate( + "v2_docs_update.plan_change_units", + json.dumps(payload, ensure_ascii=False, indent=2), + log_context="workflow.v2.docs_update.from_feature.plan", + trace=context.runtime.trace.module("workflow.v2.docs_update.from_feature.llm"), + ) + return self._parse_type_inference(raw) + + def _parse_type_inference(self, raw: str) -> dict[int, str]: + try: + data = json.loads(str(raw or "").strip()) + except json.JSONDecodeError: + return {} + rows = data.get("items") if isinstance(data, dict) else [] + if not isinstance(rows, list): + return {} + result: dict[int, str] = {} + for row in rows: + if not isinstance(row, dict): + continue + index = row.get("index") + doc_type = str(row.get("doc_type") or "").strip() + if not isinstance(index, int) or doc_type not in ALLOWED_DOC_TYPES: + continue + result[index] = doc_type + return result + + def _build_unit_plan( + self, + context: DocUpdateFromFeatureContext, + unit, + state: DocsState, + inferred_doc_type: str, + ) -> PlannedChange | None: + doc_type = str(unit.metadata.get("type") or inferred_doc_type).strip() + if doc_type not in ALLOWED_DOC_TYPES: + context.issues.append(f"Unit '{unit.heading}': неизвестный или отсутствующий type '{doc_type}'.") + return None + unit_id = str(unit.metadata.get("id") or self._make_doc_id(doc_type, unit.heading)).strip() + op_hint = str(unit.metadata.get("op") or "create_or_update").strip().lower() + target_hint = str(unit.metadata.get("target_path_hint") or "").strip() + path = self._resolve_path(doc_type, unit_id, unit.heading, target_hint, state) + op = self._resolve_op(op_hint, unit_id, path, state) + source_refs = self._as_list(unit.metadata.get("source_refs")) or ["section: 5. Функциональные требования"] + related_docs = self._as_list(unit.metadata.get("related_docs")) + reason = f"Из unit '{unit.heading}' системной аналитики ({context.analytics_meta.analysis_id or 'analysis'})." + return PlannedChange( + op=op, + path=path, + doc_type=doc_type, + reason=reason, + title=unit.heading, + doc_id=unit_id, + requirement_body=str(unit.body or "").strip(), + source_refs=source_refs, + related_docs=related_docs, + ) + + def _resolve_path(self, doc_type: str, unit_id: str, heading: str, hint: str, state: DocsState) -> str: + if unit_id in state.by_doc_id: + return state.by_doc_id[unit_id] + if hint: + return hint + folder = DOC_TYPE_TO_FOLDER.get(doc_type, "docs") + slug = self._slugify(unit_id or heading) + return f"{folder}/{slug}.md" + + def _resolve_op(self, op_hint: str, unit_id: str, path: str, state: DocsState) -> str: + if op_hint == "delete": + return "delete" + if op_hint == "create": + return "create" + if op_hint == "update": + return "update" + if path in state.by_path or unit_id in state.by_doc_id: + return "update" + return "create" + + def _make_doc_id(self, doc_type: str, heading: str) -> str: + slug = self._slugify(heading).replace("-", "_") + return f"{doc_type}.{slug}".strip(".") + + def _slugify(self, value: str) -> str: + cleaned = re.sub(r"[^a-zA-Z0-9а-яА-Я_-]+", "-", value.lower()).strip("-") + return re.sub(r"-+", "-", cleaned) or "doc" + + def _as_list(self, value: object) -> list[str]: + if isinstance(value, list): + return [str(item).strip() for item in value if str(item).strip()] + if isinstance(value, str) and value.strip(): + return [value.strip()] + return [] diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py new file mode 100644 index 0000000..9f2709a --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/docs_state_loader.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(slots=True) +class DocEntry: + path: str + doc_id: str + + +@dataclass(slots=True) +class DocsState: + by_path: set[str] = field(default_factory=set) + by_doc_id: dict[str, str] = field(default_factory=dict) + + @classmethod + def from_rows(cls, rows: list[dict]) -> "DocsState": + state = cls() + for row in rows: + path = str(row.get("path") or "").strip() + if not path: + continue + state.by_path.add(path) + metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {} + raw_doc_id = metadata.get("id") or metadata.get("doc_id") or metadata.get("document_id") + doc_id = str(raw_doc_id or "").strip() + if doc_id and doc_id not in state.by_doc_id: + state.by_doc_id[doc_id] = path + return state diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py new file mode 100644 index 0000000..aebadfd --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/feature_markdown_parser.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import ( + AnalyticsMeta, + FeatureRequirementUnit, +) + + +@dataclass(slots=True) +class ParsedFeatureSpec: + meta: AnalyticsMeta + units: list[FeatureRequirementUnit] + + +class FeatureMarkdownParser: + _META_KEYS = {"analysis_id", "domains", "subdomains"} + + def parse(self, content: str) -> ParsedFeatureSpec: + lines = content.splitlines() + meta = self._parse_meta(lines) + section_lines = self._extract_functional_section(lines) + units = self._parse_units(section_lines) + return ParsedFeatureSpec(meta=meta, units=units) + + def _parse_meta(self, lines: list[str]) -> AnalyticsMeta: + values: dict[str, object] = {} + i = 0 + while i < len(lines): + line = lines[i].strip() + if line.startswith("## "): + break + if ":" not in line: + i += 1 + continue + key, raw = [part.strip() for part in line.split(":", 1)] + if key not in self._META_KEYS: + i += 1 + continue + if raw: + values[key] = raw.strip("\"'") + i += 1 + continue + items: list[str] = [] + j = i + 1 + while j < len(lines): + item_line = lines[j] + if not item_line.startswith(" - ") and not item_line.startswith("- "): + break + items.append(item_line.split("-", 1)[1].strip().strip("\"'")) + j += 1 + values[key] = items + i = j + return AnalyticsMeta( + analysis_id=str(values.get("analysis_id") or "").strip(), + domains=self._as_list(values.get("domains")), + subdomains=self._as_list(values.get("subdomains")), + ) + + def _extract_functional_section(self, lines: list[str]) -> list[str]: + started = False + section: list[str] = [] + for line in lines: + stripped = line.strip() + if stripped.startswith("## "): + if self._is_functional_heading(stripped): + started = True + continue + if started: + break + if started: + section.append(line) + return section + + def _is_functional_heading(self, value: str) -> bool: + lowered = value.lower() + return "функциональные требования" in lowered and lowered.startswith("## 5") + + def _parse_units(self, section_lines: list[str]) -> list[FeatureRequirementUnit]: + units: list[FeatureRequirementUnit] = [] + current_title = "" + current_lines: list[str] = [] + for line in section_lines: + if line.strip().startswith("### "): + if current_title: + units.append(self._build_unit(current_title, current_lines)) + current_title = line.strip()[4:].strip() + current_lines = [] + continue + if current_title: + current_lines.append(line) + if current_title: + units.append(self._build_unit(current_title, current_lines)) + return units + + def _build_unit(self, title: str, lines: list[str]) -> FeatureRequirementUnit: + metadata, body = self._split_metadata_and_body(lines) + return FeatureRequirementUnit(heading=title, body="\n".join(body).strip(), metadata=metadata) + + def _split_metadata_and_body(self, lines: list[str]) -> tuple[dict[str, object], list[str]]: + meta: dict[str, object] = {} + body_start = 0 + i = 0 + while i < len(lines): + raw = lines[i] + stripped = raw.strip() + if not stripped: + body_start = i + 1 + i += 1 + continue + if ":" not in stripped: + break + key, value = [part.strip() for part in stripped.split(":", 1)] + if not key.isidentifier(): + break + if value: + meta[key] = value.strip("\"'") + body_start = i + 1 + i += 1 + continue + items: list[str] = [] + j = i + 1 + while j < len(lines) and lines[j].lstrip().startswith("- "): + items.append(lines[j].split("-", 1)[1].strip().strip("\"'")) + j += 1 + meta[key] = items + body_start = j + i = j + return meta, lines[body_start:] + + def _as_list(self, value: object) -> list[str]: + if isinstance(value, list): + return [str(item).strip() for item in value if str(item).strip()] + if isinstance(value, str) and value.strip(): + return [value.strip()] + return [] diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py new file mode 100644 index 0000000..5224c60 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/finalize_answer_step.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import json + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.workflow import WorkflowStep + + +class FinalizeAnswerStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "finalize" + title = "Подготовка ответа" + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer: + return context + lines: list[str] = ["DOC_UPDATE/FROM_FEATURE: результат построения changeset."] + if context.issues: + lines.append("\nОбнаружены несоответствия/нехватка данных:") + for issue in context.issues: + lines.append(f"- {issue}") + lines.append("\nПлан изменений:") + if not context.planned_changes: + lines.append("- Не сформирован.") + else: + for item in context.planned_changes: + lines.append(f"- {item.op}: {item.path} ({item.doc_type})") + lines.append("\nChangeset (для плагина):") + lines.append("```json") + lines.append(json.dumps([item.model_dump() for item in context.changeset], ensure_ascii=False, indent=2)) + lines.append("```") + context.apply_changeset = bool(context.changeset) + lines.append(f"\napply_changeset: {str(context.apply_changeset).lower()}") + context.answer = "\n".join(lines) + context.answer_generated_payload = { + "answer_mode": "docs_update_changeset", + "answer_length": len(context.answer), + "changeset_items": len(context.changeset), + "apply_changeset": context.apply_changeset, + } + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return { + "answer_length": len(context.answer), + "issues": len(context.issues), + "changeset_items": len(context.changeset), + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py new file mode 100644 index 0000000..0dea2a3 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_doc_rules_step.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.workflow import WorkflowStep + + +class LoadDocRulesStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "load_doc_rules" + title = "Загрузка doc_rules" + + def __init__(self, loader: DocRulesLoader | None = None) -> None: + self._loader = loader or DocRulesLoader() + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer or not context.doc_rules_enabled: + return context + bundle = self._loader.load() + context.doc_rules_bundle = bundle + if not bundle.supported_doc_types: + context.issues.append( + f"doc_rules не загружены: отсутствуют пары artifact/template в {self._loader.root.as_posix()}." + ) + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + supported = sorted((context.doc_rules_bundle.supported_doc_types if context.doc_rules_bundle else set())) + return { + "enabled": context.doc_rules_enabled, + "loaded": context.doc_rules_bundle is not None, + "supported_doc_types": supported, + "issues": len(context.issues), + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py new file mode 100644 index 0000000..0402d6d --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/load_source_content_step.py @@ -0,0 +1,48 @@ +from __future__ import annotations +from pathlib import Path + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.workflow import WorkflowStep + + +class LoadSourceContentStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "load_source" + title = "Загрузка системной аналитики" + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer: + return context + if not context.source_ref: + return context + if context.source_kind == "confluence_url": + context.issues.append("Пока не поддерживается скачивание системной аналитики по Confluence URL.") + return context + if context.source_kind != "markdown_file": + context.issues.append("Поддерживается только источник markdown-файла (.md).") + return context + source_path = Path(context.source_ref) + if not source_path.exists() or not source_path.is_file(): + context.issues.append(f"Файл системной аналитики не найден: {context.source_ref}") + return context + try: + context.feature_content = source_path.read_text(encoding="utf-8") + context.project_root = self._resolve_project_root(source_path).as_posix() + except Exception as exc: + context.issues.append(f"Не удалось прочитать файл аналитики: {exc}") + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return { + "source_kind": context.source_kind, + "content_loaded": bool(context.feature_content), + "project_root": context.project_root, + "issues": len(context.issues), + } + + def _resolve_project_root(self, source_path: Path) -> Path: + parts = list(source_path.parts) + if "_incoming" in parts: + idx = parts.index("_incoming") + if idx > 0: + return Path(*parts[:idx]) + return source_path.parent diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py new file mode 100644 index 0000000..3634c4d --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/parse_feature_requirements_step.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.steps.feature_markdown_parser import FeatureMarkdownParser +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.workflow import WorkflowStep + + +class ParseFeatureRequirementsStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "parse_feature" + title = "Парсинг функциональных требований" + + def __init__(self, parser: FeatureMarkdownParser | None = None) -> None: + self._parser = parser or FeatureMarkdownParser() + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer or not context.feature_content: + return context + parsed = self._parser.parse(context.feature_content) + context.analytics_meta = parsed.meta + context.units = parsed.units + if not context.analytics_meta.analysis_id: + context.issues.append("Отсутствует analysis_id в metadata аналитики.") + if not context.analytics_meta.domains: + context.issues.append("Отсутствует domains в metadata аналитики.") + if not context.analytics_meta.subdomains: + context.issues.append("Отсутствует subdomains в metadata аналитики.") + if not context.units: + context.issues.append( + "Не найдены units в разделе '## 5. Функциональные требования' с заголовками уровня '###'." + ) + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return { + "analysis_id": context.analytics_meta.analysis_id, + "domains": context.analytics_meta.domains, + "subdomains": context.analytics_meta.subdomains, + "units": len(context.units), + "issues": len(context.issues), + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml new file mode 100644 index 0000000..6e170df --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml @@ -0,0 +1,36 @@ +namespace: v2_docs_update + +prompts: + plan_change_units: | + Ты классифицируешь units системной аналитики для построения плана изменений документации. + + Верни только JSON: + { + "items": [ + {"index": 0, "doc_type": "api_method", "reason": "..."} + ] + } + + Правила: + - Используй только doc_type из allowed_doc_types. + - Не пропускай item, даже если не уверен: выбери наиболее близкий тип. + - Ориентируйся на heading и snippet. + - Никакого markdown и текста вне JSON. + + build_doc_changeset: | + Ты формируешь один item changeset для документации на основе системной аналитики и правил doc_rules. + + Верни только JSON-объект формата: + { + "op": "create|update|delete", + "path": "docs/...", + "reason": "краткая причина", + "proposed_content": "полный markdown документа для create/update" + } + + Правила: + - Строго соблюдай структуру и ограничения из doc_rules_context. + - Для create/update верни полный итоговый markdown (frontmatter + body). + - Для update не используй placeholder-тексты; возвращай пригодный к сохранению документ. + - reason обязателен, короткий, по сути изменения. + - Никакого markdown и текста вне JSON. diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py new file mode 100644 index 0000000..63a6924 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/steps/resolve_source_step.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import re + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import DocUpdateFromFeatureContext +from app.core.agent.utils.workflow import WorkflowStep + + +class ResolveSourceStep(WorkflowStep[DocUpdateFromFeatureContext]): + step_id = "resolve_source" + title = "Определение источника аналитики" + + _PATH_PATTERN = re.compile(r"(/[^\n`]+?\.md)") + _URL_PATTERN = re.compile(r"https?://[^\s)]+") + + async def run(self, context: DocUpdateFromFeatureContext) -> DocUpdateFromFeatureContext: + if context.answer: + return context + query = context.route.user_query + path = self._extract_path(query) + if path: + context.source_ref = path + context.source_kind = "markdown_file" + return context + url = self._extract_url(query) + if url: + context.source_ref = url + context.source_kind = "confluence_url" if "confluence" in url.lower() else "url" + return context + context.issues.append("Не удалось определить источник системной аналитики (ожидался путь .md или URL).") + return context + + def trace_output(self, context: DocUpdateFromFeatureContext) -> dict[str, object]: + return {"source_kind": context.source_kind, "source_ref": context.source_ref, "issues": len(context.issues)} + + def _extract_path(self, query: str) -> str: + if "`" in query: + for chunk in query.split("`"): + value = chunk.strip().strip('"').strip("'") + if value.endswith(".md") and value.startswith("/"): + return value + match = self._PATH_PATTERN.search(query) + if not match: + return "" + return match.group(1).strip().strip('"').strip("'") + + def _extract_url(self, query: str) -> str: + match = self._URL_PATTERN.search(query) + return match.group(0).strip() if match else "" diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py new file mode 100644 index 0000000..32574e8 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime helpers for DOC_UPDATE/FROM_FEATURE workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py new file mode 100644 index 0000000..2f7a532 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/buffered_graph.py @@ -0,0 +1,96 @@ +"""Buffered graph for DOC_UPDATE/FROM_FEATURE workflow.""" + +from __future__ import annotations + +from typing import TypeVar + +from app.core.agent.utils.workflow.context import WorkflowContext +from app.core.agent.utils.workflow.graph import WorkflowGraph + +TContext = TypeVar("TContext", bound=WorkflowContext) + + +class DocUpdateFromFeatureWorkflowGraph(WorkflowGraph[TContext]): + async def run(self, context: TContext) -> TContext: + trace = context.runtime.trace.module(self._source) + trace.log("workflow_started", {"workflow_id": self._workflow_id}) + steps_buffer: list[dict[str, object]] = [] + for step in self._steps: + before = self._snapshot(context) + raw_inp = step.trace_input(context) + inp = self._merge_trace_payload(raw_inp, before) + request_id = context.runtime.request.request_id + await context.runtime.publisher.publish_status( + request_id, + self._source, + f"Шаг workflow: {step.title}.", + {"workflow_id": self._workflow_id, "step_id": step.step_id}, + ) + context = await step.run(context) + after = self._snapshot(context) + raw_out = step.trace_output(context) + out = self._merge_trace_payload(raw_out, after) + trace.log( + "workflow_step_traced", + { + "workflow_id": self._workflow_id, + "step": {"id": step.step_id, "title": step.title}, + "input": inp, + "output": out, + }, + ) + steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) + trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer}) + trace.log("workflow_completed", {"workflow_id": self._workflow_id}) + return context + + def _merge_trace_payload(self, payload: dict[str, object] | None, snapshot: dict[str, object]) -> dict[str, object]: + if not payload: + return snapshot + merged = dict(payload) + merged["_context"] = snapshot + return merged + + def _snapshot(self, context: TContext) -> dict[str, object]: + analytics = getattr(context, "analytics_meta", None) + units = list(getattr(context, "units", []) or []) + planned = list(getattr(context, "planned_changes", []) or []) + changeset = list(getattr(context, "changeset", []) or []) + bundle = getattr(context, "doc_rules_bundle", None) + issue_items = list(getattr(context, "issues", []) or []) + return { + "source_kind": str(getattr(context, "source_kind", "") or ""), + "source_ref": str(getattr(context, "source_ref", "") or ""), + "project_root": str(getattr(context, "project_root", "") or ""), + "feature_content_len": len(str(getattr(context, "feature_content", "") or "")), + "analysis_id": str(getattr(analytics, "analysis_id", "") or ""), + "domains": list(getattr(analytics, "domains", []) or []), + "subdomains": list(getattr(analytics, "subdomains", []) or []), + "units_count": len(units), + "unit_headings": [str(getattr(unit, "heading", "")) for unit in units[:5]], + "docs_rows_count": len(list(getattr(context, "docs_catalog_rows", []) or [])), + "doc_rules_enabled": bool(getattr(context, "doc_rules_enabled", False)), + "doc_rules_loaded": bundle is not None, + "doc_rules_supported_types": sorted(list(getattr(bundle, "supported_doc_types", set()) or set())), + "planned_changes_count": len(planned), + "planned_changes_preview": [ + { + "op": str(getattr(item, "op", "") or ""), + "path": str(getattr(item, "path", "") or ""), + "doc_type": str(getattr(item, "doc_type", "") or ""), + } + for item in planned[:5] + ], + "changeset_count": len(changeset), + "changeset_preview": [ + { + "op": str(getattr(item, "op", "") or ""), + "path": str(getattr(item, "path", "") or ""), + } + for item in changeset[:5] + ], + "apply_changeset": bool(getattr(context, "apply_changeset", False)), + "answer_len": len(str(getattr(context, "answer", "") or "")), + "issues_count": len(issue_items), + "issues_preview": [str(value) for value in issue_items[:5]], + } diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py new file mode 100644 index 0000000..8b3b5a9 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/context.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.schemas.changeset import ChangeItem +from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import ( + AnalyticsMeta, + FeatureRequirementUnit, + PlannedChange, +) + + +@dataclass(slots=True) +class DocUpdateFromFeatureContext: + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + source_ref: str = "" + source_kind: str = "" + project_root: str = "" + feature_content: str = "" + analytics_meta: AnalyticsMeta = field(default_factory=AnalyticsMeta) + units: list[FeatureRequirementUnit] = field(default_factory=list) + docs_catalog_rows: list[dict] = field(default_factory=list) + doc_rules_enabled: bool = True + doc_rules_bundle: DocRulesBundle | None = None + planned_changes: list[PlannedChange] = field(default_factory=list) + changeset: list[ChangeItem] = field(default_factory=list) + apply_changeset: bool = False + issues: list[str] = field(default_factory=list) + answer: str = "" + answer_generated_payload: dict[str, object] | None = None diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py new file mode 100644 index 0000000..f81a1b6 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/models.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(slots=True) +class AnalyticsMeta: + analysis_id: str = "" + domains: list[str] = field(default_factory=list) + subdomains: list[str] = field(default_factory=list) + + +@dataclass(slots=True) +class FeatureRequirementUnit: + heading: str + body: str + metadata: dict[str, object] = field(default_factory=dict) + + +@dataclass(slots=True) +class PlannedChange: + op: str + path: str + doc_type: str + reason: str + title: str + doc_id: str + requirement_body: str = "" + source_refs: list[str] = field(default_factory=list) + related_docs: list[str] = field(default_factory=list) diff --git a/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py new file mode 100644 index 0000000..52dc0e5 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/doc_update_from_feature/workflow_runtime/system_rules.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +ALLOWED_DOC_TYPES: tuple[str, ...] = ( + "ui_page", + "api_method", + "logic_block", + "architecture_overview", + "integration_doc", + "domain_entity", + "glossary_item", + "index_page", +) + +DOC_TYPE_TO_FOLDER: dict[str, str] = { + "ui_page": "docs/ui", + "api_method": "docs/api", + "logic_block": "docs/logic", + "architecture_overview": "docs/architecture", + "integration_doc": "docs/integrations", + "domain_entity": "docs/domains", + "glossary_item": "docs/glossary", + "index_page": "docs", +} + +SYSTEM_RULES_TEXT = """ +Системные правила документации: +1. Один устойчивый объект — один документ. +2. Документы не должны дублировать друг друга по смыслу. +3. Связи между документами должны быть явными (related_docs/links). +4. Документация организована иерархически по папкам docs/*. +5. Markdown-документ состоит из YAML frontmatter и body. +6. Обязательные поля frontmatter: id, title, doc_type, related_docs, status, domain, sub_domain. +""".strip() diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py deleted file mode 100644 index 74d0a49..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph - -__all__ = ["DocsExplainFindFilesGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py deleted file mode 100644 index 51dad0a..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/context.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field - -from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision -from app.core.agent.processes.v2.models import RetrievedFile, V2RouteResult -from app.core.agent.runtime.execution_context import RuntimeExecutionContext - - -@dataclass(slots=True) -class DocsExplainFindFilesContext: - runtime: RuntimeExecutionContext - route: V2RouteResult - rag_session_id: str - files: list[RetrievedFile] = field(default_factory=list) - gate_decision: EvidenceGateDecision | None = None - answer: str = "" diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py deleted file mode 100644 index e9ad91f..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/graph.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import annotations - -from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext -from app.core.agent.processes.v2.workflows.docs_explain_find_files.steps.finalize_find_files_answer_step import ( - FinalizeFindFilesAnswerStep, -) -from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph - - -class DocsExplainFindFilesGraph(V2WorkflowGraph[DocsExplainFindFilesContext]): - def __init__(self) -> None: - super().__init__( - workflow_id="v2.docs_explain.find_files", - source="workflow.v2.find_files", - steps=[FinalizeFindFilesAnswerStep()], - ) diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py b/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py deleted file mode 100644 index 863ae4c..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_find_files/steps/finalize_find_files_answer_step.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext -from app.core.agent.utils.workflow import WorkflowStep - - -class FinalizeFindFilesAnswerStep(WorkflowStep[DocsExplainFindFilesContext]): - step_id = "finalize_find_files_answer" - title = "Сборка списка файлов" - - async def run(self, context: DocsExplainFindFilesContext) -> DocsExplainFindFilesContext: - if not context.files: - context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу." - return context - if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist": - context.answer = "\n".join(item.path for item in context.files[:4]) - return context - if len(context.files) == 1: - context.answer = context.files[0].path - return context - context.answer = "\n".join(item.path for item in context.files[:4]) - return context - - def trace_output(self, context: DocsExplainFindFilesContext) -> dict[str, object]: - return {"answer_length": len(context.answer)} diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py deleted file mode 100644 index 99020fa..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph - -__all__ = ["DocsExplainSummaryGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py b/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py deleted file mode 100644 index d1df68a..0000000 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/graph.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext -from app.core.agent.processes.v2.workflows.docs_explain_summary.steps.generate_summary_answer_step import ( - GenerateSummaryAnswerStep, -) -from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph -from app.core.agent.utils.llm import AgentLlmService - - -class DocsExplainSummaryGraph(V2WorkflowGraph[DocsExplainSummaryContext]): - def __init__(self, llm: AgentLlmService) -> None: - super().__init__( - workflow_id="v2.docs_explain.summary", - source="workflow.v2.summary", - steps=[GenerateSummaryAnswerStep(llm)], - ) diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md b/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md new file mode 100644 index 0000000..c4bae97 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/README.md @@ -0,0 +1,162 @@ +# GENERAL_QA / SUMMARY Workflow + +## Контракт сабинтента + +| Поле | Значение | +|---|---| +| `domain` | `GENERAL` | +| `intent` | `GENERAL_QA` | +| `subintent` | `SUMMARY` | +| `workflow_id` | `v2.general_qa.summary` | +| `source` | `workflow.v2.general_summary` | + +## Диаграмма флоу + +```mermaid +flowchart TD + A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"] + B --> C["FetchRagRowsStep"] + C --> D["PrepareCandidateRowsStep"] + D --> E["BuildSummaryEvidenceStep"] + E --> F["ApplySummaryEvidenceGateStep"] + F --> G["GenerateGeneralSummaryAnswerStep"] +``` + +## Шаги процесса + +### 1) `RequireRagSessionStep` + +Шаг проверяет, что есть активная RAG-сессия с проиндексированной документацией. Если сессии нет, workflow сразу возвращает ответ о недостатке опоры и ставит `missing_rag_session` в gate-решение. Это защищает от псевдо-генерации без данных. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | `V2Process` -> `GeneralQaSummaryContext` | Идентификатор RAG-сессии | +| `self._missing_message` | Конфигурация в `graph.py` | Текст отказа при отсутствии сессии | +| `self._missing_gate` | Конфиг шага | Преднастроенное gate-решение для раннего выхода | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой | +| `context.gate_decision` | Заполняется `missing_gate` (`reason=missing_rag_session`) | +| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` для раннего ответа | + +### 2) `ResolveRetrievalPlanStep` + +Шаг строит retrieval-план для общего QA-summary сценария. План включает профиль `general_qa_grounded_summary`, слои и фильтры, релевантные широкому запросу по документации. Параметры плана пишутся в trace для диагностики политики. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | `intent_router` | Route с сигналами запроса | +| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` | +| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieval_plan` | `self._resolver.resolve(context.route)` | +| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог `profile/layers/limit/filters` | + +### 3) `FetchRagRowsStep` + +Шаг запускает retrieval через `V2RagRetrievalAdapter` по нормализованному запросу и плану. Адаптер объединяет seed-документы по hints и основную retrieval-выдачу. Так в `retrieved_rows` попадают как точные, так и семантически близкие источники. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rag_session_id` | Контекст workflow | Сессия поиска | +| `context.route.normalized_query` | Route | Текст запроса для retrieval | +| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` | + +### 4) `PrepareCandidateRowsStep` + +Шаг выполняет post-processing retrieval-строк: metadata lookup, merge и seed от target hints. Он формирует `context.rows` как итоговый набор кандидатов для summary-ranking. Одновременно пишет расширенный retrieval-trace с источниками кандидатов. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.retrieved_rows` | `FetchRagRowsStep` | Сырые retrieval-строки | +| `context.route` | Route | Hints и сигналы запроса | +| `self._builder` | `CandidateRowsBuilder()` | Логика подготовки кандидатов | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` | +| `process.v2.rag_retrieval.rag_rows_fetched` | Лог retrieval-деталей и источников | + +### 5) `BuildSummaryEvidenceStep` + +Шаг строит shortlist summary-документов через `DocsEvidenceAssembler.assemble_summaries`. На этом этапе формируются оценка релевантности и причины совпадения для каждого документа. Результат нужен и для gate, и для финального ответа. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.rows` | `PrepareCandidateRowsStep` | Кандидаты после merge | +| `context.route` | Route | Сигналы маршрута для scoring | +| `self._assembler` | DI из `graph.py` | Сборщик evidence | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.documents` | `assemble_summaries(context.rows, context.route)` | +| `process.v2.evidence.evidence_assembled` | Лог количества и путей summary-документов | + +### 6) `ApplySummaryEvidenceGateStep` + +Шаг проверяет, достаточно ли качества и объёма summary-evidence для grounded-ответа. Если gate не пройден, следующий шаг вернёт `gate_decision.message` без LLM-генерации. Это ограничивает риск неуверенных ответов. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.route` | Route | Контекст запроса | +| `context.documents` | `BuildSummaryEvidenceStep` | Подготовленные summary | +| `self._gate` | DI из `graph.py` | Правила gate-проверки | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` | +| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` | + +### 7) `GenerateGeneralSummaryAnswerStep` + +Шаг финализирует ответ по одной из трёх веток: gate-message, детерминированный режим, или LLM по prompt. При LLM-ветке формируется `prompt_input` из запроса и списка опорных документов, затем вызывается `llm.generate`. На выходе всегда ставится `answer_generated_payload` с режимом и длиной ответа. + +**Входные параметры** + +| Параметр | Откуда берётся | Описание | +|---|---|---| +| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы | +| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Решение gate | +| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм | +| `context.prompt_name` | `V2Process` | Имя prompt-шаблона | +| `self._llm` | DI из `graph.py` | LLM-сервис | + +**Выходные параметры** + +| Параметр | Как формируется | +|---|---| +| `context.prompt_input` | Сборка блоков `Запрос пользователя` + `Опорные документы` | +| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` | +| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке ответа | + diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py new file mode 100644 index 0000000..138b6cf --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/__init__.py @@ -0,0 +1,3 @@ +from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph + +__all__ = ["GeneralQaSummaryGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py new file mode 100644 index 0000000..96f21a4 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/graph.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.buffered_graph import GeneralQaSummaryWorkflowGraph +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.apply_summary_evidence_gate_step import ( + ApplySummaryEvidenceGateStep, +) +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.fetch_rag_rows_step import FetchRagRowsStep +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.generate_general_summary_answer_step import ( + GenerateGeneralSummaryAnswerStep, +) +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.prepare_candidate_rows_step import ( + PrepareCandidateRowsStep, +) +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.require_rag_session_step import RequireRagSessionStep +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.resolve_retrieval_plan_step import ( + ResolveRetrievalPlanStep, +) +from app.core.agent.utils.llm import AgentLlmService +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate, EvidenceGateDecision +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter + + +class GeneralQaSummaryGraph(GeneralQaSummaryWorkflowGraph[GeneralQaSummaryContext]): + def __init__( + self, + llm: AgentLlmService, + policy_resolver: RetrievalPlanResolver, + rag_adapter: V2RagRetrievalAdapter, + evidence_assembler: DocsEvidenceAssembler, + evidence_gate: DocsEvidenceGate, + ) -> None: + super().__init__( + workflow_id="v2.general_qa.summary", + source="workflow.v2.general_summary", + steps=[ + RequireRagSessionStep( + missing_message="Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией.", + missing_gate=EvidenceGateDecision( + passed=False, + answer_mode="insufficient_evidence", + reason="missing_rag_session", + message="Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией.", + ), + ), + ResolveRetrievalPlanStep(policy_resolver), + FetchRagRowsStep(rag_adapter), + PrepareCandidateRowsStep(CandidateRowsBuilder()), + BuildSummaryEvidenceStep(evidence_assembler), + ApplySummaryEvidenceGateStep(evidence_gate), + GenerateGeneralSummaryAnswerStep(llm), + ], + ) diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py new file mode 100644 index 0000000..60343a5 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/apply_summary_evidence_gate_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import SummaryWorkflowContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=SummaryWorkflowContext) + + +class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "apply_summary_evidence_gate" + title = "Проверка summary evidence" + + def __init__(self, gate: DocsEvidenceGate) -> None: + self._gate = gate + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.gate_decision = self._gate.check_summaries(context.route, context.documents) + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": context.gate_decision.passed, + "reason": context.gate_decision.reason, + "answer_mode": context.gate_decision.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"passed": bool(context.gate_decision and context.gate_decision.passed)} diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py new file mode 100644 index 0000000..a82292e --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/build_summary_evidence_step.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import SummaryWorkflowContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=SummaryWorkflowContext) + + +class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "build_summary_evidence" + title = "Сборка summary evidence" + + def __init__(self, assembler: DocsEvidenceAssembler) -> None: + self._assembler = assembler + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + context.documents = self._assembler.assemble_summaries(context.rows, context.route) + context.runtime.trace.module("process.v2.evidence").log( + "evidence_assembled", + { + "mode": "summary", + "document_count": len(context.documents), + "documents": [item.path for item in context.documents], + }, + ) + log_pipeline_step( + context.runtime, + "evidence_assembled", + { + "mode": "summary", + "primary_doc": context.documents[0].path if context.documents else None, + "document_count": len(context.documents), + }, + ) + log_ranking(context.runtime, context.documents) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"document_count": len(context.documents)} diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py new file mode 100644 index 0000000..8974716 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/fetch_rag_rows_step.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "fetch_rag_rows" + title = "Получение строк из RAG" + + def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None: + self._rag_adapter = rag_adapter + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + context.retrieved_rows = await self._rag_adapter.fetch_rows( + context.rag_session_id, + context.route.normalized_query, + context.retrieval_plan, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"retrieved_row_count": len(context.retrieved_rows)} diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py similarity index 59% rename from src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py index 5e6945f..9339e66 100644 --- a/src/app/core/agent/processes/v2/workflows/general_summary/steps/generate_general_summary_answer_step.py +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/generate_general_summary_answer_step.py @@ -2,24 +2,31 @@ from __future__ import annotations import asyncio -from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext from app.core.agent.utils.llm import AgentLlmService from app.core.agent.utils.workflow import WorkflowStep -class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]): +class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralQaSummaryContext]): step_id = "generate_general_summary_answer" title = "Общий ответ через LLM" def __init__(self, llm: AgentLlmService) -> None: self._llm = llm - async def run(self, context: GeneralSummaryContext) -> GeneralSummaryContext: + async def run(self, context: GeneralQaSummaryContext) -> GeneralQaSummaryContext: + if context.answer: + return context if context.gate_decision is not None and not context.gate_decision.passed: context.answer = context.gate_decision.message + context.answer_generated_payload = { + "answer_mode": context.gate_decision.answer_mode, + "answer_length": len(context.answer), + } return context if not context.workflow_llm_enabled: context.answer = self._build_deterministic_answer(context) + context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)} return context context.prompt_input = self._build_prompt_input(context) request_id = context.runtime.request.request_id @@ -30,13 +37,11 @@ class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]): log_context=f"agent:{request_id}", trace=context.runtime.trace.module("workflow.v2.general_summary.llm"), ) + context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)} return context - def _build_prompt_input(self, context: GeneralSummaryContext) -> str: - blocks = [ - f"Запрос пользователя:\n{context.route.user_query}", - "Опорные документы:", - ] + def _build_prompt_input(self, context: GeneralQaSummaryContext) -> str: + blocks = [f"Запрос пользователя:\n{context.route.user_query}", "Опорные документы:"] for index, item in enumerate(context.documents, start=1): blocks.append( f"{index}. path: {item.path}\n" @@ -45,13 +50,13 @@ class GenerateGeneralSummaryAnswerStep(WorkflowStep[GeneralSummaryContext]): ) return "\n\n".join(blocks) - def _build_deterministic_answer(self, context: GeneralSummaryContext) -> str: + def _build_deterministic_answer(self, context: GeneralQaSummaryContext) -> str: if not context.documents: return "В найденной документации нет достаточной опоры для общего summary по запросу." return "\n".join(item.summary for item in context.documents[:2] if item.summary) - def trace_input(self, context: GeneralSummaryContext) -> dict[str, object]: + def trace_input(self, context: GeneralQaSummaryContext) -> dict[str, object]: return {"query": context.route.normalized_query} - def trace_output(self, context: GeneralSummaryContext) -> dict[str, object]: + def trace_output(self, context: GeneralQaSummaryContext) -> dict[str, object]: return {"answer_length": len(context.answer)} diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py new file mode 100644 index 0000000..d1c7553 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prepare_candidate_rows_step.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_retrieval_trace +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "prepare_candidate_rows" + title = "Подготовка candidate rows" + + def __init__(self, builder: CandidateRowsBuilder) -> None: + self._builder = builder + + async def run(self, context: TContext) -> TContext: + if context.answer or context.retrieval_plan is None: + return context + prepared = self._builder.build(context.retrieved_rows, context.route) + context.rows = prepared.rows + log_retrieval_trace( + context.runtime, + context.route, + context.retrieval_plan, + context.retrieved_rows, + prepared.metadata_rows, + prepared.rows, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"row_count": len(context.rows)} diff --git a/src/app/core/agent/processes/v2/general_prompts.yml b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml similarity index 100% rename from src/app/core/agent/processes/v2/general_prompts.yml rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py new file mode 100644 index 0000000..44bae7e --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/require_rag_session_step.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "require_rag_session" + title = "Проверка RAG-сессии" + + def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None: + self._missing_message = missing_message + self._missing_gate = missing_gate + + async def run(self, context: TContext) -> TContext: + if context.rag_session_id: + return context + context.answer = self._missing_message + if self._missing_gate is not None: + context.gate_decision = self._missing_gate + context.answer_generated_payload = { + "answer_mode": self._missing_gate.answer_mode, + "answer_length": len(context.answer), + } + log_pipeline_step( + context.runtime, + "evidence_gate_checked", + { + "passed": self._missing_gate.passed, + "reason": self._missing_gate.reason, + "answer_mode": self._missing_gate.answer_mode, + }, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"has_rag_session": bool(context.rag_session_id)} diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py new file mode 100644 index 0000000..d48fb84 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/resolve_retrieval_plan_step.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Generic, TypeVar + +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext +from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.pipeline_logging import log_pipeline_step +from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver +from app.core.agent.utils.workflow import WorkflowStep + +TContext = TypeVar("TContext", bound=RetrievalWorkflowContext) + + +class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]): + step_id = "resolve_retrieval_plan" + title = "Выбор retrieval-плана" + + def __init__(self, resolver: RetrievalPlanResolver) -> None: + self._resolver = resolver + + async def run(self, context: TContext) -> TContext: + if context.answer: + return context + plan = self._resolver.resolve(context.route) + context.retrieval_plan = plan + context.runtime.trace.module("process.v2.retrieval_policy").log( + "retrieval_plan_resolved", + {"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters}, + ) + log_pipeline_step( + context.runtime, + "retrieval_profile_selected", + {"profile": plan.profile, "layers": plan.layers, "filters": plan.filters}, + ) + return context + + def trace_output(self, context: TContext) -> dict[str, object]: + return {"profile": getattr(context.retrieval_plan, "profile", "")} diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py new file mode 100644 index 0000000..bcd4a4a --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/__init__.py @@ -0,0 +1,2 @@ +"""Retrieval-related step helpers for the general-qa summary workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py new file mode 100644 index 0000000..b1d88cf --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/candidate_rows.py @@ -0,0 +1,43 @@ +"""Сборка candidate rows для general QA summary (метаданные + сиды по hints).""" + +from __future__ import annotations + +from dataclasses import dataclass + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import ( + RagRowIndex, + merge_row_lists, + normalize_doc_path, + normalized_path_set, + seed_candidates_from_target_hints, +) + + +@dataclass(slots=True) +class CandidateRowsResult: + metadata_rows: list[dict] + rows: list[dict] + + +class CandidateRowsBuilder: + def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult: + metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route) + rows = merge_row_lists(retrieved_rows, metadata_rows) + rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows)) + self._print_missing_target_hints(route, rows) + return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows) + + def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None: + if not route.anchors.target_doc_hints: + return + candidate_paths = normalized_path_set(rows) + for hint in route.anchors.target_doc_hints: + if not str(hint or "").strip(): + continue + normalized = normalize_doc_path(hint) + if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]: + continue + if normalized not in candidate_paths: + print("ERROR: target doc missing from candidates:", normalized) diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py new file mode 100644 index 0000000..ddf4b80 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/retrieval/retrieval_policy.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult +from app.core.rag.contracts.enums import RagLayer +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class GeneralQaSummaryRetrievalPolicy: + _LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS] + + def supports(self, route: V2RouteResult) -> bool: + return route.intent == V2Intent.GENERAL_QA + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + return RetrievalPlan( + profile="general_qa_grounded_summary", + layers=list(self._LAYERS), + limit=8, + filters={ + "prefer_path_prefixes": ["docs/architecture/", "docs/"], + "prefer_like_patterns": ["%readme.md%", "%overview%"], + "target_doc_hints": list(route.anchors.target_doc_hints), + }, + ) + diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py new file mode 100644 index 0000000..1f799db --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/__init__.py @@ -0,0 +1,2 @@ +"""Runtime helpers for the general-qa summary workflow.""" + diff --git a/src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py similarity index 73% rename from src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py index b3db545..efe5b81 100644 --- a/src/app/core/agent/processes/v2/workflows/v2_workflow_graph.py +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/buffered_graph.py @@ -1,18 +1,16 @@ -"""Workflow-граф v2: буфер шаговых логов и один сброс в trace в конце прогона.""" +"""Граф workflow general QA summary: буфер шагов и один сброс в trace (на базе utils.workflow).""" from __future__ import annotations -from typing import Generic, Sequence, TypeVar +from typing import TypeVar from app.core.agent.utils.workflow.context import WorkflowContext from app.core.agent.utils.workflow.graph import WorkflowGraph -from app.core.agent.utils.workflow.step import WorkflowStep - TContext = TypeVar("TContext", bound=WorkflowContext) -class V2WorkflowGraph(WorkflowGraph[TContext]): +class GeneralQaSummaryWorkflowGraph(WorkflowGraph[TContext]): """Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``.""" async def run(self, context: TContext) -> TContext: @@ -30,6 +28,15 @@ class V2WorkflowGraph(WorkflowGraph[TContext]): ) context = await step.run(context) out = step.trace_output(context) + trace.log( + "workflow_step_traced", + { + "workflow_id": self._workflow_id, + "step": {"id": step.step_id, "title": step.title}, + "input": inp, + "output": out, + }, + ) steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out}) trace.log( "workflow_trace_flushed", diff --git a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py similarity index 50% rename from src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py rename to src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py index 660dd63..114f0db 100644 --- a/src/app/core/agent/processes/v2/workflows/docs_explain_summary/context.py +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context.py @@ -2,19 +2,24 @@ from __future__ import annotations from dataclasses import dataclass, field -from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision -from app.core.agent.processes.v2.models import RetrievedSummary, V2RouteResult from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan @dataclass(slots=True) -class DocsExplainSummaryContext: +class GeneralQaSummaryContext: runtime: RuntimeExecutionContext route: V2RouteResult rag_session_id: str prompt_name: str workflow_llm_enabled: bool = True + retrieval_plan: RetrievalPlan | None = None + retrieved_rows: list[dict] = field(default_factory=list) + rows: list[dict] = field(default_factory=list) documents: list[RetrievedSummary] = field(default_factory=list) gate_decision: EvidenceGateDecision | None = None prompt_input: str = "" answer: str = "" + answer_generated_payload: dict[str, object] | None = None diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py new file mode 100644 index 0000000..88b3f92 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/context_protocols.py @@ -0,0 +1,26 @@ +"""Протоколы контекста для workflow general QA summary.""" + +from __future__ import annotations + +from typing import Protocol + +from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision +from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class RetrievalWorkflowContext(Protocol): + runtime: RuntimeExecutionContext + route: V2RouteResult + rag_session_id: str + retrieval_plan: RetrievalPlan | None + retrieved_rows: list[dict] + rows: list[dict] + gate_decision: EvidenceGateDecision | None + answer: str + answer_generated_payload: dict[str, object] | None + + +class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol): + documents: list[RetrievedSummary] diff --git a/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py new file mode 100644 index 0000000..9f1b542 --- /dev/null +++ b/src/app/core/agent/processes/v2/workflows/general_qa_summary/workflow_runtime/pipeline_logging.py @@ -0,0 +1,106 @@ +"""Логирование retrieval/pipeline/ranking для general QA summary.""" + +from __future__ import annotations + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path + + +def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None: + runtime.trace.module("process.v2.pipeline").log(step, payload) + + +def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None: + runtime.trace.module("process.v2.rag_retrieval").log( + "rag_rows_fetched", + { + "profile": plan.profile, + "row_count": len(rows), + "rows": [trace_row(row) for row in rows], + }, + ) + hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()} + log_pipeline_step( + runtime, + "candidate_generation", + { + "query": route.user_query, + "profile": plan.profile, + "details": { + "target_doc_hints": list(route.anchors.target_doc_hints), + "candidates_before_ranking": [row_path(row) for row in rows if row_path(row)], + }, + "resolved_aliases": route.anchors.matched_aliases, + "target_doc_hints": route.anchors.target_doc_hints, + "candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]], + "sources": { + "seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths], + "metadata_lookup": [trace_row(row) for row in metadata_rows[:5]], + "semantic": [trace_row(row) for row in retrieved_rows[:5]], + }, + }, + ) + log_pipeline_step( + runtime, + "retrieval_executed", + { + "query": route.user_query, + "profile": plan.profile, + "row_count": len(rows), + "target_doc_hints": route.anchors.target_doc_hints, + "top_results": [trace_row(row) for row in rows[:5]], + }, + ) + + +def log_ranking(runtime, items: list) -> None: + top_docs: list[dict[str, object]] = [] + for item in items[:4]: + top_docs.append( + { + "doc": getattr(item, "path", ""), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + } + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + "score": getattr(item, "score", 0), + "match_reason": getattr(item, "match_reason", ""), + }, + ) + log_pipeline_step( + runtime, + "ranking_explained", + { + "top_docs_after_ranking": top_docs, + "ranking_score_breakdown": [ + { + "doc": getattr(item, "path", ""), + "score_breakdown": getattr(item, "score_breakdown", {}), + } + for item in items[:4] + ], + }, + ) + + +def trace_row(row: dict) -> dict[str, object]: + metadata = row.get("metadata") or {} + content = str(row.get("content") or "").strip() + return { + "layer": str(row.get("layer") or ""), + "path": str(row.get("path") or ""), + "title": str(row.get("title") or ""), + "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""), + "entity_name": str(metadata.get("entity_name") or ""), + "summary_text": str(metadata.get("summary_text") or "")[:400], + "section_path": str(metadata.get("section_path") or ""), + "metadata_domain": str(metadata.get("domain") or ""), + "metadata_subdomain": str(metadata.get("subdomain") or ""), + "content_preview": content[:400], + } diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py b/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py deleted file mode 100644 index 732779c..0000000 --- a/src/app/core/agent/processes/v2/workflows/general_summary/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph - -__all__ = ["GeneralSummaryGraph"] diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/context.py b/src/app/core/agent/processes/v2/workflows/general_summary/context.py deleted file mode 100644 index d2b2507..0000000 --- a/src/app/core/agent/processes/v2/workflows/general_summary/context.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field - -from app.core.agent.processes.v2.evidence.gate import EvidenceGateDecision -from app.core.agent.processes.v2.models import RetrievedSummary, V2RouteResult -from app.core.agent.runtime.execution_context import RuntimeExecutionContext - - -@dataclass(slots=True) -class GeneralSummaryContext: - runtime: RuntimeExecutionContext - route: V2RouteResult - prompt_name: str - workflow_llm_enabled: bool = True - documents: list[RetrievedSummary] = field(default_factory=list) - gate_decision: EvidenceGateDecision | None = None - prompt_input: str = "" - answer: str = "" diff --git a/src/app/core/agent/processes/v2/workflows/general_summary/graph.py b/src/app/core/agent/processes/v2/workflows/general_summary/graph.py deleted file mode 100644 index 5441cad..0000000 --- a/src/app/core/agent/processes/v2/workflows/general_summary/graph.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext -from app.core.agent.processes.v2.workflows.general_summary.steps.generate_general_summary_answer_step import ( - GenerateGeneralSummaryAnswerStep, -) -from app.core.agent.processes.v2.workflows.v2_workflow_graph import V2WorkflowGraph -from app.core.agent.utils.llm import AgentLlmService - - -class GeneralSummaryGraph(V2WorkflowGraph[GeneralSummaryContext]): - def __init__(self, llm: AgentLlmService) -> None: - super().__init__( - workflow_id="v2.general_qa.summary", - source="workflow.v2.general_summary", - steps=[GenerateGeneralSummaryAnswerStep(llm)], - ) diff --git a/src/app/core/agent/runtime/agent_runtime.py b/src/app/core/agent/runtime/agent_runtime.py index 817513e..00f2ba7 100644 --- a/src/app/core/agent/runtime/agent_runtime.py +++ b/src/app/core/agent/runtime/agent_runtime.py @@ -47,6 +47,8 @@ class AgentRuntime: await self._announce_start(request.request_id, process.version) result = await self._process_runner.run(context, process) request.answer = result.answer + request.changeset = list(result.changeset) + request.apply_changeset = bool(result.apply_changeset) await self._publish_result(request) self._complete_request(request, session) except Exception as exc: diff --git a/src/app/core/agent/utils/process_v2/__init__.py b/src/app/core/agent/utils/process_v2/__init__.py new file mode 100644 index 0000000..5868c0e --- /dev/null +++ b/src/app/core/agent/utils/process_v2/__init__.py @@ -0,0 +1 @@ +"""Shared runtime helpers for process v2.""" diff --git a/src/app/core/agent/processes/v2/anchor_signals.py b/src/app/core/agent/utils/process_v2/anchor_signals.py similarity index 81% rename from src/app/core/agent/processes/v2/anchor_signals.py rename to src/app/core/agent/utils/process_v2/anchor_signals.py index ea9b177..21046f0 100644 --- a/src/app/core/agent/processes/v2/anchor_signals.py +++ b/src/app/core/agent/utils/process_v2/anchor_signals.py @@ -1,6 +1,6 @@ from __future__ import annotations -from app.core.agent.processes.v2.models import V2AnchorType, V2RouteAnchors, V2RouteResult, V2Subintent +from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteAnchors, V2RouteResult, V2Subintent def anchor_signal_types(route: V2RouteResult) -> set[str]: @@ -28,6 +28,11 @@ def route_anchor_summary(route: V2RouteResult) -> dict[str, object]: "matched_aliases": list(route.anchors.matched_aliases), "process_domain": route.anchors.process_domain, "process_subdomain": route.anchors.process_subdomain, + "scope_type": route.scope_type, + "candidate_domains": [c.value for c in route.anchors.candidate_domains], + "candidate_subdomains": [c.value for c in route.anchors.candidate_subdomains], + "candidate_entities": [c.value for c in route.anchors.candidate_entities], + "candidate_apis": [c.value for c in route.anchors.candidate_apis], "signal_types": sorted(anchor_signal_types(route)), } diff --git a/src/app/core/agent/utils/process_v2/evidence/__init__.py b/src/app/core/agent/utils/process_v2/evidence/__init__.py new file mode 100644 index 0000000..b518b2d --- /dev/null +++ b/src/app/core/agent/utils/process_v2/evidence/__init__.py @@ -0,0 +1,4 @@ +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate, EvidenceGateDecision + +__all__ = ["DocsEvidenceAssembler", "DocsEvidenceGate", "EvidenceGateDecision"] diff --git a/src/app/core/agent/processes/v2/evidence/assembler.py b/src/app/core/agent/utils/process_v2/evidence/assembler.py similarity index 95% rename from src/app/core/agent/processes/v2/evidence/assembler.py rename to src/app/core/agent/utils/process_v2/evidence/assembler.py index 537a9cc..5b241da 100644 --- a/src/app/core/agent/processes/v2/evidence/assembler.py +++ b/src/app/core/agent/utils/process_v2/evidence/assembler.py @@ -4,15 +4,16 @@ from __future__ import annotations import re -from app.core.agent.processes.v2.anchor_signals import anchor_signal_types -from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2RouteResult -from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path +from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types +from app.core.agent.utils.process_v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2RouteResult +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path from app.core.rag.contracts.enums import RagLayer class DocsEvidenceAssembler: _API_PATH_PREFIXES = ("docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/") _GENERIC_DOC_MARKERS = ("readme", "overview", "index", "navigation", "related docs", "catalog") + def assemble_summaries(self, rows: list[dict], route: V2RouteResult) -> list[RetrievedSummary]: items = self._rank_rows(rows, route, mode="summary") ranked = [ @@ -140,7 +141,6 @@ class DocsEvidenceAssembler: if mode == "find_files": breakdown["path_match"] *= 3 breakdown["filename_match"] *= 2 - breakdown["alias_match"] *= 1 breakdown["semantic"] = max(0, breakdown["semantic"] // 2) return breakdown @@ -181,10 +181,7 @@ class DocsEvidenceAssembler: hn = normalize_doc_path(hint).lower() if hn in top_norm: continue - candidate = next( - (item for item in ranked if normalize_doc_path(item["path"]).lower() == hn), - None, - ) + candidate = next((item for item in ranked if normalize_doc_path(item["path"]).lower() == hn), None) if candidate is None: continue if len(top) < k: @@ -203,7 +200,10 @@ class DocsEvidenceAssembler: first = ranked[0] if not first.get("is_generic_doc"): return ranked - promoted = next((item for item in ranked[1:] if not item.get("is_generic_doc") and self._is_specific_candidate(item, route)), None) + promoted = next( + (item for item in ranked[1:] if not item.get("is_generic_doc") and self._is_specific_candidate(item, route)), + None, + ) if promoted is None: return ranked return [promoted] + [item for item in ranked if item["path"] != promoted["path"]] diff --git a/src/app/core/agent/processes/v2/evidence/gate.py b/src/app/core/agent/utils/process_v2/evidence/gate.py similarity index 94% rename from src/app/core/agent/processes/v2/evidence/gate.py rename to src/app/core/agent/utils/process_v2/evidence/gate.py index 37d961b..ed6b0d6 100644 --- a/src/app/core/agent/processes/v2/evidence/gate.py +++ b/src/app/core/agent/utils/process_v2/evidence/gate.py @@ -2,8 +2,8 @@ from __future__ import annotations from dataclasses import dataclass, field -from app.core.agent.processes.v2.anchor_signals import anchor_signal_types -from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2Intent, V2RouteResult +from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types +from app.core.agent.utils.process_v2.models import RetrievedFile, RetrievedSummary, V2AnchorType, V2Intent, V2RouteResult @dataclass(slots=True) diff --git a/src/app/core/agent/processes/v2/models.py b/src/app/core/agent/utils/process_v2/models.py similarity index 70% rename from src/app/core/agent/processes/v2/models.py rename to src/app/core/agent/utils/process_v2/models.py index a9b86bc..51e9751 100644 --- a/src/app/core/agent/processes/v2/models.py +++ b/src/app/core/agent/utils/process_v2/models.py @@ -1,4 +1,4 @@ -"""Типы маршрута и выдачи retrieval для процесса v2.""" +"""Route and retrieval models for process v2.""" from __future__ import annotations @@ -12,12 +12,15 @@ class V2Domain: class V2Intent: DOC_EXPLAIN = "DOC_EXPLAIN" + DOC_UPDATE = "DOC_UPDATE" GENERAL_QA = "GENERAL_QA" class V2Subintent: SUMMARY = "SUMMARY" FIND_FILES = "FIND_FILES" + API_EXPOSED = "API_EXPOSED" + FROM_FEATURE = "FROM_FEATURE" class V2AnchorType: @@ -29,6 +32,26 @@ class V2AnchorType: FIND_FILES = "FIND_FILES" +class V2ScopeType: + """Grounded documentation scope (pre-LLM, catalog-backed).""" + + GLOBAL = "global" + DOMAIN = "domain" + SUBDOMAIN = "subdomain" + ENTITY = "entity" + UNKNOWN = "unknown" + + +@dataclass(slots=True) +class ScopeCandidate: + """A single catalog-backed match candidate for intent-router scope grounding.""" + + value: str + score: float + source_layer: str + match_type: str + + @dataclass(slots=True) class V2RouteAnchors: """Якоря из запроса для retrieval и downstream.""" @@ -40,6 +63,10 @@ class V2RouteAnchors: matched_aliases: list[str] = field(default_factory=list) process_domain: str | None = None process_subdomain: str | None = None + candidate_domains: list[ScopeCandidate] = field(default_factory=list) + candidate_subdomains: list[ScopeCandidate] = field(default_factory=list) + candidate_entities: list[ScopeCandidate] = field(default_factory=list) + candidate_apis: list[ScopeCandidate] = field(default_factory=list) @dataclass(slots=True) @@ -55,6 +82,7 @@ class V2RouteResult: routing_mode: str = "deterministic" llm_router_used: bool = False reason_short: str = "" + scope_type: str = V2ScopeType.UNKNOWN @property def domain(self) -> str: diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py b/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py new file mode 100644 index 0000000..8b4183a --- /dev/null +++ b/src/app/core/agent/utils/process_v2/plan_resolver/__init__.py @@ -0,0 +1,6 @@ +"""Построение RetrievalPlan по маршруту v2 (intent/subintent, якоря, фильтры).""" + +from app.core.agent.utils.process_v2.plan_resolver.base import RetrievalPlanResolver, WorkflowRetrievalPlanResolver +from app.core.agent.utils.process_v2.plan_resolver.policy_resolver import V2RetrievalPolicyResolver + +__all__ = ["RetrievalPlanResolver", "WorkflowRetrievalPlanResolver", "V2RetrievalPolicyResolver"] diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/base.py b/src/app/core/agent/utils/process_v2/plan_resolver/base.py new file mode 100644 index 0000000..da8ccb2 --- /dev/null +++ b/src/app/core/agent/utils/process_v2/plan_resolver/base.py @@ -0,0 +1,26 @@ +"""Contracts for v2 retrieval-plan resolvers.""" + +from __future__ import annotations + +from typing import Protocol + +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class RetrievalPlanResolver(Protocol): + """Minimal contract used by workflows to build retrieval plans.""" + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + """Build a retrieval plan for the provided route.""" + + +class WorkflowRetrievalPlanResolver(Protocol): + """Route-aware resolver implementation used by delegating policy resolver.""" + + def supports(self, route: V2RouteResult) -> bool: + """Return True when this resolver can handle the route.""" + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + """Build a retrieval plan for the provided route.""" + diff --git a/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py b/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py new file mode 100644 index 0000000..a450c68 --- /dev/null +++ b/src/app/core/agent/utils/process_v2/plan_resolver/policy_resolver.py @@ -0,0 +1,42 @@ +"""Delegating retrieval policy resolver for process v2.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.retrieval_policy import ( + DocExplainApiExposedRetrievalPolicy, +) +from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.retrieval_policy import ( + DocExplainFindFilesRetrievalPolicy, +) +from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.retrieval_policy import ( + DocExplainSummaryRetrievalPolicy, +) +from app.core.agent.processes.v2.workflows.general_qa_summary.steps.retrieval.retrieval_policy import ( + GeneralQaSummaryRetrievalPolicy, +) +from app.core.agent.utils.process_v2.models import V2RouteResult +from app.core.agent.utils.process_v2.plan_resolver.base import WorkflowRetrievalPlanResolver +from app.core.rag.retrieval.session_retriever import RetrievalPlan + + +class V2RetrievalPolicyResolver: + """Resolve retrieval plans by delegating to workflow-local resolvers.""" + + def __init__(self, resolvers: Sequence[WorkflowRetrievalPlanResolver] | None = None) -> None: + self._resolvers = list(resolvers or self._default_resolvers()) + + def resolve(self, route: V2RouteResult) -> RetrievalPlan: + for resolver in self._resolvers: + if resolver.supports(route): + return resolver.resolve(route) + raise ValueError(f"No retrieval policy for route: {(route.routing_domain, route.intent, route.subintent)!r}") + + def _default_resolvers(self) -> tuple[WorkflowRetrievalPlanResolver, ...]: + return ( + GeneralQaSummaryRetrievalPolicy(), + DocExplainFindFilesRetrievalPolicy(), + DocExplainApiExposedRetrievalPolicy(), + DocExplainSummaryRetrievalPolicy(), + ) diff --git a/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py b/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py new file mode 100644 index 0000000..1e69d31 --- /dev/null +++ b/src/app/core/agent/utils/process_v2/rag_retrieval/__init__.py @@ -0,0 +1,17 @@ +"""Слой извлечения из RAG для process v2: адаптер к сессии, индексы и нормализация путей.""" + +from app.core.agent.utils.process_v2.rag_retrieval.metadata_lookup import DocsMetadataLookupIndex +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import ( + RagRowIndex, + normalize_doc_path, + seed_candidates_from_target_hints, +) +from app.core.agent.utils.process_v2.rag_retrieval.v2_rag_adapter import V2RagRetrievalAdapter + +__all__ = [ + "DocsMetadataLookupIndex", + "RagRowIndex", + "V2RagRetrievalAdapter", + "normalize_doc_path", + "seed_candidates_from_target_hints", +] diff --git a/src/app/core/agent/processes/v2/retrieval/metadata_lookup.py b/src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py similarity index 93% rename from src/app/core/agent/processes/v2/retrieval/metadata_lookup.py rename to src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py index 6125329..1ac162f 100644 --- a/src/app/core/agent/processes/v2/retrieval/metadata_lookup.py +++ b/src/app/core/agent/utils/process_v2/rag_retrieval/metadata_lookup.py @@ -1,9 +1,11 @@ +"""Индекс метаданных RAG-строк для подбора кандидатов по маршруту v2.""" + from __future__ import annotations import re from collections import defaultdict -from app.core.agent.processes.v2.models import V2RouteResult +from app.core.agent.utils.process_v2.models import V2RouteResult class DocsMetadataLookupIndex: diff --git a/src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py b/src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py similarity index 96% rename from src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py rename to src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py index 3c8151f..7dcb9f5 100644 --- a/src/app/core/agent/processes/v2/retrieval/target_doc_seeding.py +++ b/src/app/core/agent/utils/process_v2/rag_retrieval/target_doc_seeding.py @@ -1,3 +1,5 @@ +"""Нормализация путей документов, склейка RAG-строк и сидирование по target hints.""" + from __future__ import annotations diff --git a/src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py b/src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py similarity index 54% rename from src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py rename to src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py index 7246c83..fedfe70 100644 --- a/src/app/core/agent/processes/v2/retrieval/v2_rag_adapter.py +++ b/src/app/core/agent/utils/process_v2/rag_retrieval/v2_rag_adapter.py @@ -2,7 +2,7 @@ from __future__ import annotations -from app.core.agent.processes.v2.retrieval.target_doc_seeding import ( +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import ( merge_row_lists, normalize_doc_path, path_variants_for_rag_query, @@ -17,7 +17,11 @@ class _PlanDrivenRetrieval: async def fetch_rows(self, rag_session_id: str, query_text: str, plan: RetrievalPlan) -> list[dict]: seeded_rows = await self._seed_from_target_hints(rag_session_id, plan) semantic_rows = await self._retriever.retrieve(rag_session_id, query_text, plan) - return merge_row_lists(seeded_rows, semantic_rows) + merged = merge_row_lists(seeded_rows, semantic_rows) + if not _needs_docs_catalog_fallback(plan): + return _apply_query_signal_filter(merged, plan) + fallback_rows = await self._fetch_docs_catalog_rows(rag_session_id, plan) + return _apply_query_signal_filter(merge_row_lists(merged, fallback_rows), plan) async def fetch_exact_paths(self, rag_session_id: str, *, paths: list[str], layers: list[str] | None = None) -> list[dict]: return await self._retriever.retrieve_exact_files(rag_session_id, paths=paths, layers=layers) @@ -69,6 +73,10 @@ class _PlanDrivenRetrieval: limit=200, ) + async def _fetch_docs_catalog_rows(self, rag_session_id: str, plan: RetrievalPlan) -> list[dict]: + rows = await self._retriever.list_docs_scope_rows(rag_session_id, limit=max(1000, plan.limit * 10)) + return _filter_docs_rows(rows, plan)[: plan.limit] + def _target_doc_hints(self, plan: RetrievalPlan) -> list[str]: raw = plan.filters.get("target_doc_hints") if not isinstance(raw, list): @@ -106,3 +114,85 @@ class V2RagRetrievalAdapter: layers=layers, limit=limit, ) + + +def _needs_docs_catalog_fallback(plan: RetrievalPlan) -> bool: + layers = {str(item).strip() for item in plan.layers} + if "D1_DOCUMENT_CATALOG" not in layers: + return False + return "metadata.type" in plan.filters or "metadata.doc_type" in plan.filters + + +def _filter_docs_rows(rows: list[dict], plan: RetrievalPlan) -> list[dict]: + allowed_layers = {str(item).strip() for item in plan.layers if str(item).strip()} + metadata_type = _norm(plan.filters.get("metadata.type")) or _norm(plan.filters.get("metadata.doc_type")) + metadata_domain = _norm(plan.filters.get("metadata.domain")) + metadata_subdomain = _norm(plan.filters.get("metadata.subdomain")) + out: list[dict] = [] + for row in rows: + layer = str(row.get("layer") or "").strip() + if allowed_layers and layer not in allowed_layers: + continue + metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {} + row_type = _norm(metadata.get("type")) or _norm(metadata.get("doc_type")) + if metadata_type and row_type != metadata_type: + continue + if metadata_domain and _norm(metadata.get("domain")) != metadata_domain: + continue + if metadata_subdomain and _norm(metadata.get("subdomain")) != metadata_subdomain: + continue + out.append(row) + return sorted(out, key=lambda item: str(item.get("path") or "")) + + +def _norm(value: object) -> str: + return str(value or "").strip().lower() + + +def _apply_query_signal_filter(rows: list[dict], plan: RetrievalPlan) -> list[dict]: + signals = _query_signals(plan) + if not signals: + return rows + strict = [row for row in rows if _matches_any_signal(row, signals, strict=True)] + if strict: + return strict + broad = [row for row in rows if _matches_any_signal(row, signals, strict=False)] + return broad or rows + + +def _query_signals(plan: RetrievalPlan) -> list[str]: + raw = plan.filters.get("query_signals") + if not isinstance(raw, list): + return [] + return [item for item in (_norm(value) for value in raw) if item] + + +def _matches_any_signal(row: dict, signals: list[str], *, strict: bool) -> bool: + haystack = _strict_haystack(row) if strict else _broad_haystack(row) + return any(signal in haystack for signal in signals) + + +def _strict_haystack(row: dict) -> str: + metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {} + parts = [ + row.get("path"), + row.get("title"), + metadata.get("endpoint"), + metadata.get("name"), + ] + return " ".join(_norm(part) for part in parts if _norm(part)) + + +def _broad_haystack(row: dict) -> str: + metadata = row.get("metadata") if isinstance(row.get("metadata"), dict) else {} + parts = [ + row.get("path"), + row.get("title"), + row.get("content"), + metadata.get("endpoint"), + metadata.get("name"), + metadata.get("summary_text"), + metadata.get("entities"), + metadata.get("tags"), + ] + return " ".join(_norm(part) for part in parts if _norm(part)) diff --git a/src/app/core/agent/utils/traces/__init__.py b/src/app/core/agent/utils/traces/__init__.py deleted file mode 100644 index dc5b892..0000000 --- a/src/app/core/agent/utils/traces/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Shared trace helpers will live here.""" diff --git a/src/app/core/api/application/filesystem_snapshot_resolver.py b/src/app/core/api/application/filesystem_snapshot_resolver.py new file mode 100644 index 0000000..1dc4496 --- /dev/null +++ b/src/app/core/api/application/filesystem_snapshot_resolver.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import hashlib +import logging +from pathlib import Path, PurePosixPath + +LOGGER = logging.getLogger(__name__) + + +class FilesystemSnapshotResolver: + _SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", ".pytest_cache", "node_modules", "dist", "build", "target"} + _TEXT_EXTENSIONS = { + ".py", + ".md", + ".txt", + ".rst", + ".json", + ".yaml", + ".yml", + ".toml", + ".ini", + ".cfg", + ".env", + ".js", + ".ts", + ".tsx", + ".jsx", + ".sql", + ".sh", + } + + def augment(self, *, project_id: str, files: list[dict]) -> list[dict]: + root = self._resolve_root(project_id) + request_files, excluded_request = self._normalize_request_files(files) + if root is None: + if excluded_request: + LOGGER.warning( + "snapshot resolver excluded request files: count=%s reasons=%s", + len(excluded_request), + excluded_request[:20], + ) + return request_files + disk_files = self._collect(root) + merged = self._merge(disk_files=disk_files, request_files=request_files) + LOGGER.warning( + "snapshot resolver summary: project_id=%s request_total=%s request_kept=%s disk_docs=%s merged=%s", + project_id, + len(files), + len(request_files), + len(disk_files), + len(merged), + ) + if excluded_request: + LOGGER.warning( + "snapshot resolver excluded request files: count=%s reasons=%s", + len(excluded_request), + excluded_request[:20], + ) + return merged + + def _resolve_root(self, project_id: str) -> Path | None: + candidate = Path(str(project_id or "").strip()).expanduser() + if not candidate.is_absolute(): + LOGGER.warning("snapshot resolver skip fs augment: project_id is not absolute (%s)", project_id) + return None + if not candidate.exists() or not candidate.is_dir(): + LOGGER.warning("snapshot resolver skip fs augment: project_id is not existing dir (%s)", project_id) + return None + return candidate + + def _collect(self, root: Path) -> list[dict]: + docs_root = root / "docs" + if not docs_root.exists() or not docs_root.is_dir(): + LOGGER.warning("snapshot resolver: docs root not found (%s)", docs_root) + return [] + out: list[dict] = [] + for path in sorted(docs_root.rglob("*")): + if not path.is_file(): + continue + rel = path.relative_to(root).as_posix() + if self._should_skip(rel): + continue + item = self._read_file(path, rel) + if item is not None: + out.append(item) + return out + + def _should_skip(self, rel_path: str) -> bool: + parts = PurePosixPath(rel_path).parts + if not parts: + return True + if parts[0] != "docs": + return True + if any(part in self._SKIP_DIRS for part in parts): + return True + if any(part.startswith(".") for part in parts): + return True + return Path(rel_path).suffix.lower() not in self._TEXT_EXTENSIONS + + def _read_file(self, path: Path, rel_path: str) -> dict | None: + raw = path.read_bytes() + if b"\x00" in raw: + return None + content = raw.decode("utf-8", errors="ignore") + return { + "path": rel_path, + "content": content, + "content_hash": hashlib.sha256(content.encode("utf-8")).hexdigest(), + } + + def _merge(self, *, disk_files: list[dict], request_files: list[dict]) -> list[dict]: + merged: dict[str, dict] = {} + for item in disk_files: + path = str(item.get("path") or "").strip() + if path: + merged[path] = item + for item in request_files: + path = str(item.get("path") or "").strip() + if path: + # Request payload has priority for same path + merged[path] = item + return list(merged.values()) + + def _normalize_request_files(self, files: list[dict]) -> tuple[list[dict], list[str]]: + kept: list[dict] = [] + excluded: list[str] = [] + for item in files: + path = str(item.get("path") or "").replace("\\", "/").strip() + if not path: + excluded.append("empty_path") + continue + rel_path = self._normalize_rel_path(path) + if rel_path is None: + excluded.append(f"{path}:outside_docs") + continue + normalized = dict(item) + normalized["path"] = rel_path + kept.append(normalized) + return kept, excluded + + def _normalize_rel_path(self, path: str) -> str | None: + marker = "/docs/" + lowered = path.lower() + if lowered.startswith("docs/"): + rel = path + elif marker in lowered: + idx = lowered.index(marker) + 1 + rel = path[idx:] + else: + return None + rel = rel.lstrip("/").replace("\\", "/") + return rel if rel.startswith("docs/") else None diff --git a/src/app/core/api/application/session_bootstrap_service.py b/src/app/core/api/application/session_bootstrap_service.py index e55a1b1..60edcdd 100644 --- a/src/app/core/api/application/session_bootstrap_service.py +++ b/src/app/core/api/application/session_bootstrap_service.py @@ -2,6 +2,7 @@ from __future__ import annotations from dataclasses import dataclass +from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver from app.core.api.application.session_service import SessionService from app.core.api.domain.models.agent_session import AgentSession from app.core.rag.indexing import IndexJob @@ -15,11 +16,18 @@ class BootstrappedAgentSession: class SessionBootstrapService: - def __init__(self, sessions: SessionService, rag: RagModule) -> None: + def __init__( + self, + sessions: SessionService, + rag: RagModule, + snapshot_resolver: FilesystemSnapshotResolver | None = None, + ) -> None: self._sessions = sessions self._rag = rag + self._snapshot_resolver = snapshot_resolver or FilesystemSnapshotResolver() async def create(self, project_id: str, files: list[dict]) -> BootstrappedAgentSession: - rag_session, index_job = await self._rag.create_session(project_id=project_id, files=files) + resolved_files = self._snapshot_resolver.augment(project_id=project_id, files=files) + rag_session, index_job = await self._rag.create_session(project_id=project_id, files=resolved_files) session = self._sessions.create(rag_session_id=rag_session.rag_session_id) return BootstrappedAgentSession(session=session, index_job=index_job) diff --git a/src/app/core/api/controllers/request_controller.py b/src/app/core/api/controllers/request_controller.py index 233806f..716b94d 100644 --- a/src/app/core/api/controllers/request_controller.py +++ b/src/app/core/api/controllers/request_controller.py @@ -29,6 +29,8 @@ class RequestController: status=item.status.value, process_version=item.process_version, answer=item.answer, + changeset=item.changeset, + apply_changeset=item.apply_changeset, error=item.error, created_at=item.created_at, completed_at=item.completed_at, diff --git a/src/app/core/api/domain/models/agent_request.py b/src/app/core/api/domain/models/agent_request.py index c811055..b7d10db 100644 --- a/src/app/core/api/domain/models/agent_request.py +++ b/src/app/core/api/domain/models/agent_request.py @@ -1,9 +1,11 @@ from __future__ import annotations from dataclasses import dataclass +from dataclasses import field from datetime import datetime, timezone from app.schemas.common import ErrorPayload +from app.schemas.changeset import ChangeItem from app.schemas.orchestration import RequestExecutionStatus @@ -17,6 +19,8 @@ class AgentRequest: created_at: datetime completed_at: datetime | None = None answer: str | None = None + changeset: list[ChangeItem] = field(default_factory=list) + apply_changeset: bool = False error: ErrorPayload | None = None @classmethod diff --git a/src/app/core/application.py b/src/app/core/application.py index 723aaaa..31893ed 100644 --- a/src/app/core/application.py +++ b/src/app/core/application.py @@ -1,11 +1,12 @@ import logging +import os from pathlib import Path from app.core.agent.processes import V1Process, V2Process from app.core.agent.processes.v2 import V2IntentRouter -from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter from app.core.rag.retrieval.session_retriever import RagSessionRetriever from app.core.agent.runtime import AgentRuntime, ProcessRegistry, ProcessRunner, RuntimeEventPublisher from app.core.agent.utils.llm import AgentLlmService, PromptLoader @@ -54,8 +55,12 @@ class ModularApplication: ) _v2_prompt_loader = PromptLoader( [ - Path(__file__).resolve().parent / "agent/processes/v2/prompts.yml", - Path(__file__).resolve().parent / "agent/processes/v2/general_prompts.yml", + Path(__file__).resolve().parent + / "agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml", + Path(__file__).resolve().parent + / "agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml", + Path(__file__).resolve().parent + / "agent/processes/v2/workflows/doc_update_from_feature/steps/prompts/prompts.yml", Path(__file__).resolve().parent / "agent/processes/v2/intent_router/routers/prompts.yml", ] ) @@ -66,6 +71,7 @@ class ModularApplication: _v2_rag_adapter = V2RagRetrievalAdapter(_v2_rag_retriever) _v2_evidence = DocsEvidenceAssembler() _v2_policy = V2RetrievalPolicyResolver() + _doc_rules_enabled = os.getenv("V2_DOC_RULES_ENABLED", "true").strip().lower() in {"1", "true", "yes"} self.agent_sessions = InMemorySessionStore() self.agent_requests = InMemoryRequestStore() @@ -87,6 +93,7 @@ class ModularApplication: evidence_assembler=_v2_evidence, router=V2IntentRouter(llm=self._v2_llm), workflow_llm_enabled=True, + doc_rules_enabled=_doc_rules_enabled, ), ] ) diff --git a/src/app/core/rag/indexing/common/path_filter.py b/src/app/core/rag/indexing/common/path_filter.py index d178875..e757ef0 100644 --- a/src/app/core/rag/indexing/common/path_filter.py +++ b/src/app/core/rag/indexing/common/path_filter.py @@ -1,24 +1,38 @@ from __future__ import annotations +import logging from pathlib import PurePosixPath _CACHE_DIR_NAMES = {"__pycache__", "app-data", "build", "grafana"} +LOGGER = logging.getLogger(__name__) def should_skip_indexing_path(path: str) -> bool: + return indexing_exclusion_reason(path) is not None + + +def indexing_exclusion_reason(path: str) -> str | None: parts = _path_parts(path) if not parts: - return True + return "empty_path" for part in parts: if part in _CACHE_DIR_NAMES: - return True + return f"cache_dir:{part}" if _is_hidden_part(part): - return True - return False + return f"hidden_part:{part}" + return None def filter_snapshot_files(files: list[dict]) -> list[dict]: - return [item for item in files if not should_skip_indexing_path(str(item.get("path", "")))] + out: list[dict] = [] + for item in files: + path = str(item.get("path", "")) + reason = indexing_exclusion_reason(path) + if reason is not None: + LOGGER.warning("rag ingest excluded file: path=%s reason=%s", path, reason) + continue + out.append(item) + return out def filter_changes_for_indexing(changed_files: list[dict]) -> list[dict]: @@ -29,7 +43,14 @@ def filter_changes_for_indexing(changed_files: list[dict]) -> list[dict]: if op == "delete": result.append(item) continue - if should_skip_indexing_path(path): + reason = indexing_exclusion_reason(path) + if reason is not None: + LOGGER.warning( + "rag ingest excluded changed file: op=%s path=%s reason=%s", + op, + path, + reason, + ) continue result.append(item) return result diff --git a/src/app/core/rag/indexing/docs/frontmatter_parser.py b/src/app/core/rag/indexing/docs/frontmatter_parser.py index 8f58885..55d5743 100644 --- a/src/app/core/rag/indexing/docs/frontmatter_parser.py +++ b/src/app/core/rag/indexing/docs/frontmatter_parser.py @@ -1,7 +1,12 @@ from __future__ import annotations +import logging +import re + import yaml +LOGGER = logging.getLogger(__name__) + class DocsFrontmatterParser: def split(self, content: str) -> tuple[dict, str]: @@ -11,5 +16,34 @@ class DocsFrontmatterParser: if len(parts) < 3: return {}, content _, raw, body = parts - payload = yaml.safe_load(raw) or {} - return payload if isinstance(payload, dict) else {}, body.strip() + try: + payload = yaml.safe_load(raw) or {} + if isinstance(payload, dict): + return payload, body.strip() + except yaml.YAMLError as exc: + LOGGER.warning("docs frontmatter parse warning: reason=%s", exc.__class__.__name__) + payload = self._fallback(raw) + payload["__frontmatter_parse_error__"] = True + return payload, body.strip() + + def _fallback(self, raw: str) -> dict: + payload: dict[str, object] = {} + for line in raw.splitlines(): + text = line.strip() + if not text or text.startswith("#"): + continue + match = re.match(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$", text) + if not match: + continue + key = match.group(1) + value = match.group(2).strip() + if not value: + continue + if value == "[]": + payload[key] = [] + continue + if value == "{}": + payload[key] = {} + continue + payload[key] = value.strip("'\"") + return payload diff --git a/src/app/core/rag/indexing/docs/pipeline.py b/src/app/core/rag/indexing/docs/pipeline.py index 0236b57..d7c0e00 100644 --- a/src/app/core/rag/indexing/docs/pipeline.py +++ b/src/app/core/rag/indexing/docs/pipeline.py @@ -42,6 +42,8 @@ class DocsIndexingPipeline: def index_file(self, *, repo_id: str, commit_sha: str | None, path: str, content: str) -> list[RagDocument]: source = RagSource(repo_id=repo_id, commit_sha=commit_sha, path=path) frontmatter, body = self._frontmatter.split(content) + if bool(frontmatter.get("__frontmatter_parse_error__")): + LOGGER.error("docs pipeline partial index: path=%s reason=frontmatter_parse_error", path) doc_kind = self._classifier.classify(path) sections = self._chunker.chunk(body) parsed = self._content.parse( @@ -122,4 +124,10 @@ class DocsIndexingPipeline: extractor_name, exc.__class__.__name__, ) + LOGGER.error( + "docs pipeline partial index: path=%s extractor=%s reason=%s", + path, + extractor_name, + exc.__class__.__name__, + ) return [] diff --git a/src/app/core/rag/indexing/service.py b/src/app/core/rag/indexing/service.py index e366ab9..e909be3 100644 --- a/src/app/core/rag/indexing/service.py +++ b/src/app/core/rag/indexing/service.py @@ -84,6 +84,7 @@ class RagService: cached = await asyncio.to_thread(self._repo.get_cached_documents, repo_id, blob_sha) pipelines = self._resolve_pipeline_names(path) if cached: + self._report_missing_or_partial_docs(path, cached) report.documents_list.extend(self._with_file_metadata(cached, file, repo_id, blob_sha)) report.cache_hit_files += 1 LOGGER.warning( @@ -94,6 +95,7 @@ class RagService: ) else: built = self._build_documents(repo_id, path, file) + self._report_missing_or_partial_docs(path, built) embedded = await asyncio.to_thread(self._embed_documents, built, file, repo_id, blob_sha) report.documents_list.extend(embedded) await asyncio.to_thread(self._repo.cache_documents, repo_id, path, blob_sha, embedded) @@ -108,6 +110,12 @@ class RagService: except Exception as exc: report.failed_files += 1 report.warnings.append(f"{path}: {exc}") + LOGGER.error( + "rag ingest failed file: rag_session_id=%s path=%s error=%s", + rag_session_id, + path, + exc, + ) await self._notify_progress(progress_cb, index, total_files, path) report.documents = len(report.documents_list) return report @@ -134,6 +142,16 @@ class RagService: names.append("DOCS") return names + def _report_missing_or_partial_docs(self, path: str, docs: list[RagDocument]) -> None: + if not docs: + LOGGER.error("rag ingest file not indexed: path=%s reason=no_documents_built", path) + return + if not str(path).startswith("docs/"): + return + has_d1 = any(str(doc.layer or "") == "D1_DOCUMENT_CATALOG" for doc in docs) + if not has_d1: + LOGGER.error("rag ingest file partially indexed: path=%s reason=missing_d1_document_catalog", path) + def _embed_documents(self, docs: list[RagDocument], file: dict, repo_id: str, blob_sha: str) -> list[RagDocument]: if not docs: return [] diff --git a/src/app/core/rag/persistence/query_repository.py b/src/app/core/rag/persistence/query_repository.py index a4732ad..4d4b89b 100644 --- a/src/app/core/rag/persistence/query_repository.py +++ b/src/app/core/rag/persistence/query_repository.py @@ -27,6 +27,8 @@ class RagQueryRepository: prefer_like_patterns: list[str] | None = None, metadata_domain: str | None = None, metadata_subdomain: str | None = None, + metadata_type: str | None = None, + metadata_doc_type: str | None = None, prefer_non_tests: bool = False, ) -> list[dict]: sql, params = self._builder.build_retrieve( @@ -42,6 +44,8 @@ class RagQueryRepository: prefer_like_patterns=prefer_like_patterns, metadata_domain=metadata_domain, metadata_subdomain=metadata_subdomain, + metadata_type=metadata_type, + metadata_doc_type=metadata_doc_type, prefer_non_tests=prefer_non_tests, ) with get_engine().connect() as conn: @@ -286,6 +290,42 @@ class RagQueryRepository: rows = conn.execute(stmt, params).mappings().fetchall() return [self._row_to_dict(row) for row in rows] + def list_docs_scope_index_rows( + self, + rag_session_id: str, + *, + limit: int = 8000, + ) -> list[dict]: + """Return D1/D3 catalog rows for a session (no vector retrieval, no D0 chunks). + + Used by the v2 intent router to derive scope candidates from indexed DOCS layers only. + """ + sid = str(rag_session_id or "").strip() + if not sid: + return [] + lim = max(1, min(int(limit), 50_000)) + layers = ("D1_DOCUMENT_CATALOG", "D3_ENTITY_CATALOG") + stmt = text( + """ + SELECT path, content, layer, title, metadata_json, span_start, span_end, + 0 AS lexical_rank, + 0 AS prefer_bonus, + 0 AS test_penalty, + 0 AS structural_rank, + 0 AS layer_rank, + 0 AS distance + FROM rag_chunks + WHERE rag_session_id = :sid + AND layer IN :layers + ORDER BY layer ASC, path ASC, COALESCE(span_start, 0) ASC, COALESCE(chunk_index, 0) ASC + LIMIT :lim + """ + ).bindparams(bindparam("layers", expanding=True)) + params: dict = {"sid": sid, "layers": list(layers), "lim": lim} + with get_engine().connect() as conn: + rows = conn.execute(stmt, params).mappings().fetchall() + return [self._row_to_dict(row) for row in rows] + def _row_to_dict(self, row) -> dict: data = dict(row) raw_metadata = data.pop("metadata_json") diff --git a/src/app/core/rag/persistence/repository.py b/src/app/core/rag/persistence/repository.py index ca5ba07..87d34bb 100644 --- a/src/app/core/rag/persistence/repository.py +++ b/src/app/core/rag/persistence/repository.py @@ -71,6 +71,8 @@ class RagRepository: prefer_like_patterns: list[str] | None = None, metadata_domain: str | None = None, metadata_subdomain: str | None = None, + metadata_type: str | None = None, + metadata_doc_type: str | None = None, prefer_non_tests: bool = False, ) -> list[dict]: return self._query.retrieve( @@ -86,6 +88,8 @@ class RagRepository: prefer_like_patterns=prefer_like_patterns, metadata_domain=metadata_domain, metadata_subdomain=metadata_subdomain, + metadata_type=metadata_type, + metadata_doc_type=metadata_doc_type, prefer_non_tests=prefer_non_tests, ) @@ -160,3 +164,6 @@ class RagRepository: layers=layers, limit=limit, ) + + def list_docs_scope_index_rows(self, rag_session_id: str, *, limit: int = 8000) -> list[dict]: + return self._query.list_docs_scope_index_rows(rag_session_id, limit=limit) diff --git a/src/app/core/rag/persistence/retrieval_statement_builder.py b/src/app/core/rag/persistence/retrieval_statement_builder.py index bdedb92..9db9297 100644 --- a/src/app/core/rag/persistence/retrieval_statement_builder.py +++ b/src/app/core/rag/persistence/retrieval_statement_builder.py @@ -21,6 +21,8 @@ class RetrievalStatementBuilder: prefer_like_patterns: list[str] | None = None, metadata_domain: str | None = None, metadata_subdomain: str | None = None, + metadata_type: str | None = None, + metadata_doc_type: str | None = None, prefer_non_tests: bool = False, ) -> tuple[str, dict]: emb = "[" + ",".join(str(x) for x in query_embedding) + "]" @@ -33,6 +35,8 @@ class RetrievalStatementBuilder: self._append_like_group(filters, params, "exclude_like", exclude_like_patterns, negate=True) self._append_metadata_equals(filters, params, "metadata_domain", "domain", metadata_domain) self._append_metadata_equals(filters, params, "metadata_subdomain", "subdomain", metadata_subdomain) + self._append_metadata_equals(filters, params, "metadata_type", "type", metadata_type) + self._append_metadata_equals(filters, params, "metadata_doc_type", "doc_type", metadata_doc_type) if layers: filters.append("layer = ANY(:layers)") params["layers"] = layers diff --git a/src/app/core/rag/retrieval/session_retriever.py b/src/app/core/rag/retrieval/session_retriever.py index d19e661..8a74edf 100644 --- a/src/app/core/rag/retrieval/session_retriever.py +++ b/src/app/core/rag/retrieval/session_retriever.py @@ -77,6 +77,13 @@ class RagSessionRetriever: limit=limit, ) + async def list_docs_scope_rows(self, rag_session_id: str, *, limit: int = 8000) -> list[dict]: + return await asyncio.to_thread( + self._repository.list_docs_scope_index_rows, + rag_session_id, + limit=limit, + ) + def _embed_query(self, query: str) -> list[float]: return self._embedder.embed([query])[0] @@ -98,4 +105,8 @@ class RagSessionRetriever: out["metadata_domain"] = filters["metadata.domain"] if "metadata.subdomain" in filters: out["metadata_subdomain"] = filters["metadata.subdomain"] + if "metadata.type" in filters: + out["metadata_type"] = filters["metadata.type"] + if "metadata.doc_type" in filters: + out["metadata_type"] = filters["metadata.doc_type"] return out diff --git a/src/app/schemas/agent_api.py b/src/app/schemas/agent_api.py index 20dbb87..5244d4c 100644 --- a/src/app/schemas/agent_api.py +++ b/src/app/schemas/agent_api.py @@ -5,6 +5,7 @@ from datetime import datetime from pydantic import BaseModel, Field from app.schemas.indexing import FileSnapshot, IndexJobStatus +from app.schemas.changeset import ChangeItem from app.schemas.common import ErrorPayload @@ -40,6 +41,8 @@ class AgentRequestStateResponse(BaseModel): status: str process_version: str answer: str | None = None + changeset: list[ChangeItem] = Field(default_factory=list) + apply_changeset: bool = False error: ErrorPayload | None = None created_at: datetime completed_at: datetime | None = None diff --git a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc index 16a4b91..c05e49d 100644 Binary files a/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc and b/tests/__pycache__/conftest.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/pipeline_setup_v3/runtime/v2_process_adapter.py b/tests/pipeline_setup_v3/runtime/v2_process_adapter.py index 92c22fa..7d9dd55 100644 --- a/tests/pipeline_setup_v3/runtime/v2_process_adapter.py +++ b/tests/pipeline_setup_v3/runtime/v2_process_adapter.py @@ -4,43 +4,42 @@ import asyncio from dataclasses import asdict, dataclass from pathlib import Path -from app.core.agent.processes.v2.anchor_signals import route_anchor_summary -from app.core.agent.processes.v2 import V2IntentRouter -from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler -from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate -from app.core.agent.processes.v2.models import RetrievedFile, RetrievedSummary, V2Intent, V2Subintent -from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter -from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext -from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph -from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext -from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph -from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext -from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph +from app.core.agent.processes.v2 import V2IntentRouter, V2Process from app.core.agent.utils.llm import AgentLlmService, PromptLoader -from app.core.rag.embedding.gigachat_embedder import GigaChatEmbedder from app.core.rag.persistence import RagRepository from app.core.rag.retrieval.session_retriever import RagSessionRetriever from app.core.shared.gigachat.client import GigaChatClient from app.core.shared.gigachat.settings import GigaChatSettings from app.core.shared.gigachat.token_provider import GigaChatTokenProvider from app.infra.observability.module_trace import RequestTraceContext +from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.process_v2.models import V2Intent +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex, V2RagRetrievalAdapter from tests.pipeline_setup_v3.core.models import ExecutionPayload, V3Case +from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder +from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm class V2ProcessAdapter: def __init__(self, *, workflow_llm_enabled: bool = True) -> None: self._workflow_llm_enabled = workflow_llm_enabled - self._router = V2IntentRouter(llm=_build_v2_llm()) + self._llm = _build_v2_llm() + self._router = V2IntentRouter(llm=_KeywordLlm(), enable_llm_disambiguation=True) self._policy = V2RetrievalPolicyResolver() - retriever = RagSessionRetriever(repository=RagRepository(), embedder=GigaChatEmbedder(_build_client())) + retriever = RagSessionRetriever(repository=RagRepository(), embedder=DeterministicEmbedder()) self._retrieval = V2RagRetrievalAdapter(retriever) - self._evidence = DocsEvidenceAssembler() - self._gate = DocsEvidenceGate() - self._summary_graph = DocsExplainSummaryGraph(_build_v2_llm()) - self._find_files_graph = DocsExplainFindFilesGraph() - self._general_graph = GeneralSummaryGraph(_build_v2_llm()) + self._process = V2Process( + llm=self._llm, + policy_resolver=self._policy, + rag_adapter=self._retrieval, + evidence_assembler=DocsEvidenceAssembler(), + evidence_gate=DocsEvidenceGate(), + router=self._router, + workflow_llm_enabled=workflow_llm_enabled, + ) def execute(self, case: V3Case, rag_session_id: str | None) -> ExecutionPayload: return asyncio.run(self._execute_async(case, rag_session_id)) @@ -81,6 +80,8 @@ class V2ProcessAdapter: actual=_actual_from_v2(route), details=_details(case.query, route=route, pipeline_steps=_build_pipeline_steps(runtime.logs)), ) + if case.mode == "full_chain": + return await self._execute_full_chain(case, rag_session_id, route) plan = self._policy.resolve(route) _log_pipeline_step( runtime, @@ -121,26 +122,7 @@ class V2ProcessAdapter: actual=_actual_from_v2(route, rows=rows, plan=plan, answer_mode="partial"), details=_details(case.query, route=route, plan=plan, rows=rows, pipeline_steps=_build_pipeline_steps(runtime.logs)), ) - answer, evidence, gate = await self._run_workflow(runtime, route, rag_session_id, rows) - answer_mode = gate.answer_mode - _log_pipeline_step( - runtime, - "answer_generated", - {"answer_mode": answer_mode, "answer_length": len(answer)}, - ) - return ExecutionPayload( - actual=_actual_from_v2(route, rows=rows, plan=plan, answer=answer, answer_mode=answer_mode), - details=_details( - case.query, - route=route, - plan=plan, - rows=rows, - evidence=evidence, - answer=answer, - logs=runtime.logs, - pipeline_steps=_build_pipeline_steps(runtime.logs), - ), - ) + raise ValueError(f"Unsupported process_v2 adapter mode: {case.mode}") async def _retrieve_rows(self, route, rag_session_id: str | None, plan) -> list[dict]: if not rag_session_id: @@ -173,125 +155,54 @@ class V2ProcessAdapter: merged.append(row) return merged - async def _run_workflow( - self, - runtime: "_RuntimeStub", - route, - rag_session_id: str | None, - rows: list[dict], - ) -> tuple[str, dict, object]: - if route.intent == V2Intent.GENERAL_QA: - documents = self._evidence.assemble_summaries(rows, route) - gate = self._gate.check_summaries(route, documents) - _log_pipeline_step( - runtime, - "evidence_assembled", - {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)}, - ) - self._log_ranking(runtime, documents) - _log_pipeline_step( - runtime, - "evidence_gate_checked", - {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode}, - ) - context = GeneralSummaryContext(runtime=runtime, route=route, prompt_name="v2_general.summary_answer") - context.workflow_llm_enabled = self._workflow_llm_enabled - context.documents = documents - context.gate_decision = gate - final = await self._general_graph.run(context) - return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate - if route.subintent == V2Subintent.FIND_FILES: - files = self._evidence.assemble_files(rows, route) - gate = self._gate.check_files(route, files) - _log_pipeline_step( - runtime, - "evidence_assembled", - {"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)}, - ) - self._log_ranking(runtime, files) - _log_pipeline_step( - runtime, - "evidence_gate_checked", - {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode}, - ) - context = DocsExplainFindFilesContext( - runtime=runtime, - route=route, - rag_session_id=rag_session_id or "", - files=files, - gate_decision=gate, - ) - final = await self._find_files_graph.run(context) - return final.answer, {"documents": [], "files": [_serialize_file(item) for item in files]}, gate - documents = self._evidence.assemble_summaries(rows, route) - gate = self._gate.check_summaries(route, documents) - _log_pipeline_step( - runtime, - "evidence_assembled", - {"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)}, - ) - self._log_ranking(runtime, documents) - _log_pipeline_step( - runtime, - "evidence_gate_checked", - {"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode}, - ) - context = DocsExplainSummaryContext( - runtime=runtime, - route=route, - rag_session_id=rag_session_id or "", - prompt_name="v2_docs_explain.summary_answer", - workflow_llm_enabled=self._workflow_llm_enabled, - documents=documents, - gate_decision=gate, - ) - final = await self._summary_graph.run(context) - return final.answer, {"documents": [_serialize_summary(item) for item in documents], "files": []}, gate - - def _trace_row(self, row: dict) -> dict[str, object]: - metadata = dict(row.get("metadata") or {}) - return { - "path": str(row.get("path") or ""), - "layer": str(row.get("layer") or ""), - "title": str(row.get("title") or ""), - "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""), - } - - def _log_ranking(self, runtime: "_RuntimeStub", items: list) -> None: - top_docs: list[dict[str, object]] = [] - for item in items[:4]: - top_docs.append( - { - "doc": getattr(item, "path", ""), - "score": getattr(item, "score", 0), - "match_reason": getattr(item, "match_reason", ""), - } - ) - _log_pipeline_step( - runtime, - "ranking_explained", - { - "doc": getattr(item, "path", ""), - "score": getattr(item, "score", 0), - "score_breakdown": getattr(item, "score_breakdown", {}), - "match_reason": getattr(item, "match_reason", ""), - }, - ) - _log_pipeline_step( - runtime, - "ranking_explained", - { - "top_docs_after_ranking": top_docs, - "ranking_score_breakdown": [ - { - "doc": getattr(item, "path", ""), - "score_breakdown": getattr(item, "score_breakdown", {}), - } - for item in items[:4] - ], + async def _execute_full_chain(self, case: V3Case, rag_session_id: str | None, route) -> ExecutionPayload: + runtime = _RuntimeStub(query=case.query, rag_session_id=rag_session_id) + result = await self._process.run(runtime) + retrieval_plan = _event_payload(runtime.logs, "process.v2.retrieval_policy", "retrieval_plan_resolved") + rows = list(_event_payload(runtime.logs, "process.v2.rag_retrieval", "rag_rows_fetched").get("rows") or []) + answer_generated = _event_payload(runtime.logs, "process.v2.pipeline", "answer_generated") + return ExecutionPayload( + actual={ + "domain": route.routing_domain, + "intent": route.intent, + "sub_intent": route.subintent, + "rag_count": len(rows), + "llm_answer": result.answer, + "answer_mode": str(answer_generated.get("answer_mode") or ""), + "path_scope": tuple(), + "symbol_candidates": tuple(), + "entity_candidates": tuple(_entity_candidates(rows)), + "doc_scope": tuple(_doc_scope(rows)), + "layers": tuple(retrieval_plan.get("layers") or []), + "filters": dict(retrieval_plan.get("filters") or {}), + }, + details={ + "query": case.query, + "router_result": asdict(route), + "retrieval_plan": retrieval_plan, + "rows": rows, + "answer": result.answer, + "logs": runtime.logs, + "pipeline_steps": _build_pipeline_steps(runtime.logs), }, ) + def _trace_row(self, row: dict) -> dict[str, object]: + metadata = row.get("metadata") or {} + content = str(row.get("content") or "").strip() + return { + "layer": str(row.get("layer") or ""), + "path": str(row.get("path") or ""), + "title": str(row.get("title") or ""), + "document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""), + "entity_name": str(metadata.get("entity_name") or ""), + "summary_text": str(metadata.get("summary_text") or "")[:400], + "section_path": str(metadata.get("section_path") or ""), + "metadata_domain": str(metadata.get("domain") or ""), + "metadata_subdomain": str(metadata.get("subdomain") or ""), + "content_preview": content[:400], + } + @dataclass(slots=True) class _RequestStub: @@ -320,10 +231,10 @@ class _TraceLoggerStub: class _RuntimeStub: - def __init__(self, *, query: str) -> None: + def __init__(self, *, query: str, rag_session_id: str | None = None) -> None: self.logs: list[dict] = [] self.request = _RequestStub(request_id="pipeline_setup_v3", message=query) - self.session = _SessionStub() + self.session = _SessionStub(active_rag_session_id=rag_session_id) self.publisher = _PublisherStub() self.trace = RequestTraceContext(request_id=self.request.request_id, logger=_TraceLoggerStub(self.logs)) @@ -335,8 +246,10 @@ def _build_client() -> GigaChatClient: def _build_v2_llm() -> AgentLlmService: prompt_paths = [ - Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/prompts.yml", - Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/general_prompts.yml", + Path(__file__).resolve().parents[3] + / "src/app/core/agent/processes/v2/workflows/doc_explain_summary/steps/prompts/prompts.yml", + Path(__file__).resolve().parents[3] + / "src/app/core/agent/processes/v2/workflows/general_qa_summary/steps/prompts/prompts.yml", Path(__file__).resolve().parents[3] / "src/app/core/agent/processes/v2/intent_router/routers/prompts.yml", ] return AgentLlmService(client=_build_client(), prompts=PromptLoader(prompt_paths)) @@ -375,7 +288,12 @@ def _doc_scope(rows: list[dict]) -> list[str]: values: list[str] = [] for row in rows: metadata = dict(row.get("metadata") or {}) - for candidate in (metadata.get("document_id"), metadata.get("doc_id"), row.get("path")): + for candidate in ( + row.get("document_id"), + metadata.get("document_id"), + metadata.get("doc_id"), + row.get("path"), + ): value = str(candidate or "").strip() if value and value not in values: values.append(value) @@ -386,20 +304,12 @@ def _entity_candidates(rows: list[dict]) -> list[str]: values: list[str] = [] for row in rows: metadata = dict(row.get("metadata") or {}) - value = str(metadata.get("entity_name") or row.get("title") or "").strip() + value = str(row.get("entity_name") or metadata.get("entity_name") or row.get("title") or "").strip() if value and value not in values and str(row.get("layer") or "") == "D3_ENTITY_CATALOG": values.append(value) return values -def _serialize_summary(item: RetrievedSummary) -> dict: - return asdict(item) - - -def _serialize_file(item: RetrievedFile) -> dict: - return asdict(item) - - def _build_pipeline_steps(logs: list[dict]) -> list[dict]: steps: list[dict] = [] for item in logs: @@ -409,6 +319,16 @@ def _build_pipeline_steps(logs: list[dict]) -> list[dict]: return steps +def _event_payload(logs: list[dict], module: str, event: str) -> dict[str, object]: + for item in logs: + if item.get("module") == module and item.get("event") == event: + payload = item.get("payload") or {} + if isinstance(payload, dict): + return dict(payload) + return {} + return {} + + def _log_pipeline_step(runtime: _RuntimeStub, step: str, payload: dict[str, object]) -> None: runtime.logs.append( { diff --git a/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py b/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py index 9d0939a..4fa92c9 100644 --- a/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py +++ b/tests/pipeline_setup_v4/executors/process_v2_full_chain_executor.py @@ -14,7 +14,7 @@ from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case class ProcessV2FullChainExecutor: def __init__(self) -> None: - self._adapter = V2ProcessAdapter(workflow_llm_enabled=True) + self._adapter = V2ProcessAdapter(workflow_llm_enabled=False) def execute(self, case: V4Case) -> ExecutionPayload: if not case.rag_session_id: @@ -27,7 +27,7 @@ class ProcessV2FullChainExecutor: pipeline_steps = list(payload.details.get("pipeline_steps") or []) pipeline_summary = { "answer_mode": str(payload.actual.get("answer_mode") or ""), - "workflow_llm_enabled": True, + "workflow_llm_enabled": False, "step_count": len(pipeline_steps), "steps": [str(step.get("step") or "") for step in pipeline_steps if str(step.get("step") or "").strip()], } @@ -102,8 +102,8 @@ def _summarize_rows(rows: list[dict]) -> dict[str, object]: path = str(row.get("path") or "").strip() layer = str(row.get("layer") or "").strip() metadata = dict(row.get("metadata") or {}) - domain = str(metadata.get("domain") or "").strip() - subdomain = str(metadata.get("subdomain") or "").strip() + domain = str(row.get("metadata_domain") or metadata.get("domain") or "").strip() + subdomain = str(row.get("metadata_subdomain") or metadata.get("subdomain") or "").strip() if path and path not in paths: paths.append(path) if layer and layer not in layers: diff --git a/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py b/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py index eaa0ee5..5982c18 100644 --- a/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py +++ b/tests/pipeline_setup_v4/executors/process_v2_retrieval_policy_executor.py @@ -2,8 +2,8 @@ from __future__ import annotations from dataclasses import asdict -from app.core.agent.processes.v2.models import V2RouteAnchors, V2RouteResult -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.models import V2RouteAnchors, V2RouteResult +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case diff --git a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py index 804cb5f..40d57fd 100644 --- a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py +++ b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_executor.py @@ -3,7 +3,7 @@ from __future__ import annotations from dataclasses import asdict from app.core.agent.processes.v2 import V2IntentRouter -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm diff --git a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py index 524a949..acedc91 100644 --- a/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py +++ b/tests/pipeline_setup_v4/executors/process_v2_router_plus_policy_rag_executor.py @@ -4,10 +4,10 @@ import asyncio from dataclasses import asdict from app.core.agent.processes.v2 import V2IntentRouter -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter from app.core.rag.persistence.repository import RagRepository from app.core.rag.retrieval.session_retriever import RagSessionRetriever +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter from tests.pipeline_setup_v3.shared.rag_indexer import DeterministicEmbedder from tests.pipeline_setup_v4.core.models import ExecutionPayload, V4Case from tests.pipeline_setup_v4.executors.process_v2_router_executor import _KeywordLlm diff --git a/tests/unit_tests/agent/test_api_endpoint_collector.py b/tests/unit_tests/agent/test_api_endpoint_collector.py new file mode 100644 index 0000000..5bb8e44 --- /dev/null +++ b/tests/unit_tests/agent/test_api_endpoint_collector.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import ( + ApiEndpointCollector, +) + + +def test_collector_returns_method_and_path_lines() -> None: + rows = [ + { + "metadata": { + "endpoint": "GET|POST /actions/{action}", + "summary_text": "Endpoint for runtime control actions", + }, + "title": "HTTP API /actions/{action}", + "path": "docs/api/control-actions-endpoint.md", + "content": "", + }, + { + "metadata": { + "endpoint": "GET /health", + }, + "title": "HTTP API /health", + "path": "docs/api/health-endpoint.md", + "content": "", + }, + ] + + endpoints = ApiEndpointCollector().collect(rows) + + assert "GET /actions/{action}" in endpoints + assert "POST /actions/{action}" in endpoints + assert "GET /health" in endpoints + + +def test_collector_ignores_file_paths_from_content() -> None: + rows = [ + { + "metadata": { + "endpoint": "GET /health", + "summary_text": "Uses src/telegram_notify_app/control_api.py", + }, + "title": "Health endpoint", + "path": "docs/api/health-endpoint.md", + "content": "See /telegram_notify_app/control_api.py and /telegram_notify_app/worker.py", + } + ] + + endpoints = ApiEndpointCollector().collect(rows) + + assert endpoints == ["GET /health"] + + +def test_collector_uses_title_path_fallback_when_endpoint_metadata_missing() -> None: + rows = [ + { + "metadata": { + "summary_text": "Control actions endpoint", + }, + "title": "HTTP API /actions/{action}", + "path": "docs/api/control-actions-endpoint.md", + "content": "", + } + ] + + endpoints = ApiEndpointCollector().collect(rows) + + assert endpoints == ["GET /actions/{action}"] diff --git a/tests/unit_tests/agent/test_v2_evidence_ranking.py b/tests/unit_tests/agent/test_v2_evidence_ranking.py index d7f4a82..c4a0c11 100644 --- a/tests/unit_tests/agent/test_v2_evidence_ranking.py +++ b/tests/unit_tests/agent/test_v2_evidence_ranking.py @@ -1,7 +1,7 @@ from __future__ import annotations -from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler -from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent def _route(*, hints: list[str], terms: list[str], subintent: str = V2Subintent.SUMMARY) -> V2RouteResult: diff --git a/tests/unit_tests/agent/test_v2_process.py b/tests/unit_tests/agent/test_v2_process.py index a0fcb02..3ab8ed7 100644 --- a/tests/unit_tests/agent/test_v2_process.py +++ b/tests/unit_tests/agent/test_v2_process.py @@ -4,11 +4,11 @@ import asyncio from dataclasses import dataclass from app.core.agent.processes.v2 import V2IntentRouter, V2Process -from app.core.agent.processes.v2.retrieval.target_doc_seeding import normalize_doc_path -from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler -from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver from app.core.agent.runtime.execution_context import RuntimeExecutionContext +from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler +from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path from app.core.api.domain.models.agent_request import AgentRequest from app.core.api.domain.models.agent_session import AgentSession from app.schemas.orchestration import RequestExecutionStatus diff --git a/tests/unit_tests/agent/test_v2_rag_adapter.py b/tests/unit_tests/agent/test_v2_rag_adapter.py index b83d21a..bb0dc4b 100644 --- a/tests/unit_tests/agent/test_v2_rag_adapter.py +++ b/tests/unit_tests/agent/test_v2_rag_adapter.py @@ -2,8 +2,8 @@ from __future__ import annotations import asyncio -from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter from app.core.rag.retrieval.session_retriever import RetrievalPlan +from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter class FakeRetriever: @@ -50,6 +50,24 @@ class FakeRetriever: self.calls.append(("substring", list(path_needles))) return [] + async def list_docs_scope_rows(self, _rag_session_id: str, *, limit: int = 8000) -> list[dict]: + del limit + self.calls.append(("scope_rows", None)) + return [ + { + "path": "docs/api/health-endpoint.md", + "layer": "D1_DOCUMENT_CATALOG", + "title": "HTTP API /health", + "metadata": {"type": "api_method", "endpoint": "GET /health"}, + }, + { + "path": "docs/api/send-message-endpoint.md", + "layer": "D1_DOCUMENT_CATALOG", + "title": "HTTP API /send", + "metadata": {"type": "api_method", "endpoint": "GET /send"}, + }, + ] + def test_v2_rag_adapter_seeds_exact_rows_from_plan_hints() -> None: adapter = V2RagRetrievalAdapter(FakeRetriever()) @@ -79,3 +97,21 @@ def test_v2_rag_adapter_uses_substring_fallback_for_missing_hint() -> None: asyncio.run(adapter.fetch_rows("rag-1", "find file", plan)) assert ("substring", ["missing-health-endpoint.md"]) in retriever.calls + + +def test_v2_rag_adapter_applies_query_signal_filter_for_api_exposed() -> None: + adapter = V2RagRetrievalAdapter(FakeRetriever()) + plan = RetrievalPlan( + profile="api_exposed", + layers=["D1_DOCUMENT_CATALOG"], + limit=50, + filters={ + "metadata.type": "api_method", + "query_signals": ["health"], + }, + ) + + rows = asyncio.run(adapter.fetch_rows("rag-1", "health endpoints", plan)) + + assert len(rows) == 1 + assert rows[0]["path"] == "docs/api/health-endpoint.md" diff --git a/tests/unit_tests/agent/test_v2_retrieval_policy.py b/tests/unit_tests/agent/test_v2_retrieval_policy.py index 78742ce..3f43234 100644 --- a/tests/unit_tests/agent/test_v2_retrieval_policy.py +++ b/tests/unit_tests/agent/test_v2_retrieval_policy.py @@ -1,7 +1,7 @@ from __future__ import annotations -from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent -from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver +from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteAnchors, V2RouteResult, V2Subintent +from app.core.agent.utils.process_v2.plan_resolver import V2RetrievalPolicyResolver def _route( diff --git a/tests/unit_tests/agent/test_v2_scope_grounding.py b/tests/unit_tests/agent/test_v2_scope_grounding.py new file mode 100644 index 0000000..18b6e21 --- /dev/null +++ b/tests/unit_tests/agent/test_v2_scope_grounding.py @@ -0,0 +1,138 @@ +"""Tests for pre-LLM scope grounding from D1/D3 catalog rows (no extra RAG layer).""" + +from __future__ import annotations + +import json + +from app.core.agent.processes.v2 import V2IntentRouter +from app.core.agent.utils.process_v2.models import V2ScopeType + + +class FakeLlm: + def __init__(self, response: str) -> None: + self.response = response + + def generate(self, prompt_name: str, user_input: str, **_kwargs) -> str: + del prompt_name, user_input + return self.response + + +def _llm_ok() -> str: + return json.dumps( + { + "routing_domain": "DOCS", + "intent": "DOC_EXPLAIN", + "subintent": "SUMMARY", + "confidence": 0.9, + "reason_short": "ok", + }, + ensure_ascii=False, + ) + + +def _fixture_rows() -> list[dict]: + return [ + { + "layer": "D1_DOCUMENT_CATALOG", + "path": "docs/billing/overview.md", + "title": "Billing", + "content": "", + "metadata": {"domain": "billing", "summary_text": "Billing domain overview"}, + }, + { + "layer": "D1_DOCUMENT_CATALOG", + "path": "docs/billing/invoices.md", + "title": "Invoices", + "content": "", + "metadata": {"domain": "billing", "subdomain": "invoice", "tags": ["invoice", "invoices"]}, + }, + { + "layer": "D3_ENTITY_CATALOG", + "path": "docs/domains/order.md", + "title": "Order", + "content": "", + "metadata": {"entity_name": "Order", "domain": "billing"}, + }, + { + "layer": "D1_DOCUMENT_CATALOG", + "path": "docs/api/invoices_post.md", + "title": "POST /api/v1/invoices", + "content": "", + "metadata": { + "doc_type": "api_method", + "domain": "billing", + "endpoint": "/api/v1/invoices", + }, + }, + { + "layer": "D1_DOCUMENT_CATALOG", + "path": "docs/widgets/readme.md", + "title": "Widgets", + "content": "", + "metadata": {"domain": "widgets", "summary_text": "Unrelated domain for negative tests"}, + }, + ] + + +def _router() -> V2IntentRouter: + return V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: _fixture_rows()) + + +def test_scope_global_project_wide_enumeration() -> None: + r = _router().route("какие api методы есть в проекте", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.GLOBAL + + +def test_scope_domain_billing() -> None: + r = _router().route("какие api есть в billing", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.DOMAIN + assert r.anchors.process_domain == "billing" + assert any(c.value == "billing" for c in r.anchors.candidate_domains) + + +def test_scope_subdomain_billing_invoices() -> None: + r = _router().route("какие api есть в billing invoices", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.SUBDOMAIN + assert r.anchors.process_domain == "billing" + assert r.anchors.process_subdomain == "invoice" + + +def test_scope_entity_order_doc() -> None: + r = _router().route("дай доку по Order", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.ENTITY + assert "order" in [e.lower() for e in r.anchors.entity_names] + + +def test_scope_entity_endpoint_path() -> None: + r = _router().route("где описан POST /api/v1/invoices", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.ENTITY + assert "/api/v1/invoices" in r.anchors.endpoint_paths + + +def test_scope_vague_no_false_domain() -> None: + r = _router().route("что там с фывырапфыв", rag_session_id="sess-1") + assert r.scope_type == V2ScopeType.UNKNOWN + assert r.anchors.process_domain is None + + +def test_scope_russian_payments_phrase_matches_tag() -> None: + rows = [ + *_fixture_rows(), + { + "layer": "D1_DOCUMENT_CATALOG", + "path": "docs/billing/payments_ru.md", + "title": "Платежи", + "content": "", + "metadata": {"domain": "billing", "tags": ["платежи"]}, + }, + ] + router = V2IntentRouter(llm=FakeLlm(_llm_ok()), scope_rows_provider=lambda _sid: rows) + r = router.route("какие методы есть в платежи", rag_session_id="sess-1") + assert r.scope_type in {V2ScopeType.DOMAIN, V2ScopeType.ENTITY, V2ScopeType.SUBDOMAIN} + assert r.anchors.process_domain == "billing" or any("платеж" in c.value for c in r.anchors.candidate_entities) + + +def test_router_without_session_skips_db_and_keeps_target_terms() -> None: + r = V2IntentRouter(llm=FakeLlm(_llm_ok())).route("Покажи где описан RuntimeHealth и /health") + assert r.scope_type == V2ScopeType.UNKNOWN + assert "runtimehealth" in r.target_terms diff --git a/tests/unit_tests/api/test_filesystem_snapshot_resolver.py b/tests/unit_tests/api/test_filesystem_snapshot_resolver.py new file mode 100644 index 0000000..b47495d --- /dev/null +++ b/tests/unit_tests/api/test_filesystem_snapshot_resolver.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import hashlib + +from app.core.api.application.filesystem_snapshot_resolver import FilesystemSnapshotResolver + + +def test_augment_adds_untracked_files_from_filesystem(tmp_path) -> None: + root = tmp_path / "repo" + root.mkdir() + docs = root / "docs" / "api" + docs.mkdir(parents=True) + control_path = docs / "control-actions-endpoint.md" + control_content = "---\ndoc_type: api_method\n---\nGET|POST /actions/{action}\n" + control_path.write_text(control_content, encoding="utf-8") + + incoming = [ + { + "path": "docs/api/health-endpoint.md", + "content": "health", + "content_hash": hashlib.sha256(b"health").hexdigest(), + } + ] + + out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming) + paths = {item["path"] for item in out} + assert "docs/api/health-endpoint.md" in paths + assert "docs/api/control-actions-endpoint.md" in paths + + +def test_augment_prefers_request_payload_for_existing_path(tmp_path) -> None: + root = tmp_path / "repo" + root.mkdir() + docs = root / "docs" / "api" + docs.mkdir(parents=True) + file_path = docs / "health-endpoint.md" + file_path.write_text("from-disk", encoding="utf-8") + + incoming_content = "from-request" + incoming = [ + { + "path": "docs/api/health-endpoint.md", + "content": incoming_content, + "content_hash": hashlib.sha256(incoming_content.encode("utf-8")).hexdigest(), + } + ] + + out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming) + by_path = {item["path"]: item for item in out} + assert by_path["docs/api/health-endpoint.md"]["content"] == incoming_content + + +def test_augment_ignores_files_outside_root_docs(tmp_path) -> None: + root = tmp_path / "repo" + root.mkdir() + (root / "docs").mkdir() + (root / "docs" / "README.md").write_text("docs", encoding="utf-8") + (root / "src").mkdir() + (root / "src" / "app.py").write_text("print('x')", encoding="utf-8") + + out = FilesystemSnapshotResolver().augment(project_id=str(root), files=[]) + paths = {item["path"] for item in out} + assert "docs/README.md" in paths + assert "src/app.py" not in paths + + +def test_augment_keeps_docs_when_request_uses_absolute_paths(tmp_path) -> None: + root = tmp_path / "repo" + root.mkdir() + (root / "docs" / "api").mkdir(parents=True) + (root / "docs" / "api" / "health-endpoint.md").write_text("disk", encoding="utf-8") + + absolute_docs = str(root / "docs" / "api" / "control-actions-endpoint.md") + absolute_src = str(root / "src" / "app.py") + incoming = [ + {"path": absolute_docs, "content": "req-doc", "content_hash": hashlib.sha256(b"req-doc").hexdigest()}, + {"path": absolute_src, "content": "req-src", "content_hash": hashlib.sha256(b"req-src").hexdigest()}, + ] + + out = FilesystemSnapshotResolver().augment(project_id=str(root), files=incoming) + paths = {item["path"] for item in out} + assert "docs/api/control-actions-endpoint.md" in paths + assert "src/app.py" not in paths diff --git a/tests/unit_tests/rag/test_docs_indexing_pipeline.py b/tests/unit_tests/rag/test_docs_indexing_pipeline.py index 03b91ab..288556e 100644 --- a/tests/unit_tests/rag/test_docs_indexing_pipeline.py +++ b/tests/unit_tests/rag/test_docs_indexing_pipeline.py @@ -304,3 +304,34 @@ Read health assert RagLayer.DOCS_INTEGRATION_INDEX in layers assert "docs integration parse warning" in caplog.text assert all(doc.source.path == "docs/api/health-endpoint.md" for doc in docs) + + +def test_docs_pipeline_tolerates_broken_frontmatter_and_keeps_api_type() -> None: + pipeline = DocsIndexingPipeline() + content = """--- +id: api.control_actions_endpoint +type: api_method +doc_type: api_method +title: HTTP API /actions/{action} +endpoint: GET|POST /actions/{action} +links: + called_by: + - ext.operator + tags: +- api +--- +# HTTP API /actions/{action} + +## Summary + +Control actions endpoint. +""" + docs = pipeline.index_file( + repo_id="acme/proj", + commit_sha="abc123", + path="docs/api/control-actions-endpoint.md", + content=content, + ) + catalog = next(doc for doc in docs if doc.layer == RagLayer.DOCS_DOCUMENT_CATALOG) + assert catalog.metadata["type"] == "api_method" + assert catalog.metadata["title"] == "HTTP API /actions/{action}"