ввв
This commit is contained in:
Vendored
+5
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"files.exclude": {
|
||||||
|
"**/__pycache__": true
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
@@ -1,6 +1,6 @@
|
|||||||
from app.core.agent.processes.base import AgentProcess, ProcessResult
|
from app.core.agent.processes.base import AgentProcess, ProcessResult
|
||||||
from app.core.agent.processes.v1.process import V1Process
|
from app.core.agent.processes.v1.process import V1Process
|
||||||
from app.core.agent.processes.v2.process import V2Process
|
from app.core.agent.processes.v2.v2_process import V2Process
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"AgentProcess",
|
"AgentProcess",
|
||||||
|
|||||||
@@ -2,8 +2,11 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from dataclasses import field
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from app.schemas.changeset import ChangeItem
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
|
||||||
@@ -11,6 +14,8 @@ if TYPE_CHECKING:
|
|||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class ProcessResult:
|
class ProcessResult:
|
||||||
answer: str = ""
|
answer: str = ""
|
||||||
|
changeset: list[ChangeItem] = field(default_factory=list)
|
||||||
|
apply_changeset: bool = False
|
||||||
|
|
||||||
|
|
||||||
class AgentProcess(ABC):
|
class AgentProcess(ABC):
|
||||||
|
|||||||
@@ -1,4 +1,11 @@
|
|||||||
from app.core.agent.processes.v2.process import V2Process
|
|
||||||
from app.core.agent.processes.v2.intent_router.router import V2IntentRouter
|
from app.core.agent.processes.v2.intent_router.router import V2IntentRouter
|
||||||
|
|
||||||
__all__ = ["V2IntentRouter", "V2Process"]
|
__all__ = ["V2IntentRouter", "V2Process"]
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str):
|
||||||
|
if name == "V2Process":
|
||||||
|
from app.core.agent.processes.v2.v2_process import V2Process
|
||||||
|
|
||||||
|
return V2Process
|
||||||
|
raise AttributeError(name)
|
||||||
|
|||||||
Binary file not shown.
@@ -0,0 +1,53 @@
|
|||||||
|
# Documentation Rules Index
|
||||||
|
|
||||||
|
Этот каталог содержит локализованную проекцию правил построения документации проекта.
|
||||||
|
Источником истины для структуры и качества документов являются process-документы:
|
||||||
|
- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/01. Process.md`
|
||||||
|
- `/Users/alex/Dev_projects_v2/ai driven app process/v2/agent/_process/04. Analitycs artefacts.md`
|
||||||
|
|
||||||
|
Файлы ниже не должны противоречить этим документам, а лишь конкретизируют их для `test_echo_app`.
|
||||||
|
|
||||||
|
## Порядок использования
|
||||||
|
|
||||||
|
1. Сначала прочитать `global/documentation-system.md`.
|
||||||
|
2. Затем прочитать `global/frontmatter.md` и `global/linking.md`.
|
||||||
|
3. Затем выбрать правило из `artifact-types/` по `doc_type`.
|
||||||
|
4. Затем использовать шаблон из `templates/`.
|
||||||
|
5. Для уточнения отдельных частей документа использовать правила из `sections/`.
|
||||||
|
|
||||||
|
## Структура каталога
|
||||||
|
|
||||||
|
- `global/` — общие правила системы документации.
|
||||||
|
- `artifact-types/` — правила по типам артефактов.
|
||||||
|
- `sections/` — правила для отдельных секций документов.
|
||||||
|
- `templates/` — шаблоны документов.
|
||||||
|
|
||||||
|
## Содержимое
|
||||||
|
|
||||||
|
### Global
|
||||||
|
- `global/documentation-system.md`
|
||||||
|
- `global/frontmatter.md`
|
||||||
|
- `global/writing-style.md`
|
||||||
|
- `global/linking.md`
|
||||||
|
- `global/naming.md`
|
||||||
|
|
||||||
|
### Artifact types
|
||||||
|
- `artifact-types/api_method.md`
|
||||||
|
- `artifact-types/logic_block.md`
|
||||||
|
- `artifact-types/architecture_overview.md`
|
||||||
|
- `artifact-types/domain_entity.md`
|
||||||
|
- `artifact-types/ui_page.md`
|
||||||
|
- `artifact-types/integration_doc.md`
|
||||||
|
|
||||||
|
### Sections
|
||||||
|
- `sections/summary.md`
|
||||||
|
- `sections/details.md`
|
||||||
|
- `sections/api-scenario.md`
|
||||||
|
- `sections/api-contract.md`
|
||||||
|
- `sections/requirements-format.md`
|
||||||
|
|
||||||
|
### Templates
|
||||||
|
- `templates/api_method.template.md`
|
||||||
|
- `templates/logic_block.template.md`
|
||||||
|
- `templates/architecture_overview.template.md`
|
||||||
|
- `templates/domain_entity.template.md`
|
||||||
@@ -0,0 +1,39 @@
|
|||||||
|
# API Method Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `api_method`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать для описания одного HTTP endpoint или одного отдельного API метода.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
Внутри `## Details` обязательны:
|
||||||
|
- `### Описание`
|
||||||
|
- `### Сценарий`
|
||||||
|
- `### Функциональные требования`
|
||||||
|
- `### Нефункциональные требования`
|
||||||
|
- `### Контракт`
|
||||||
|
|
||||||
|
## Особые правила
|
||||||
|
|
||||||
|
- Сценарий оформляется как технический use case.
|
||||||
|
- Функциональные требования маркируются `FR-*`.
|
||||||
|
- Нефункциональные требования маркируются `NFR-*`.
|
||||||
|
- Контракт должен быть пригоден для последующей сборки OpenAPI.
|
||||||
|
- Если у метода есть интеграции, они выносятся в `### Интеграции`.
|
||||||
|
- Ошибки и HTTP-коды либо описываются в `### Ошибки`, либо ссылаются на централизованный каталог ошибок.
|
||||||
|
|
||||||
|
## Ошибки оформления
|
||||||
|
|
||||||
|
- Нельзя заменять контракт общим текстовым описанием.
|
||||||
|
- Нельзя смешивать несколько endpoint в одном документе.
|
||||||
|
- Нельзя хранить связи и навигацию вне frontmatter.
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
# Architecture Overview Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `architecture_overview`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать как входной документ для понимания системы, модуля или сервиса.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
## Что описывать в Details
|
||||||
|
|
||||||
|
- границы системы
|
||||||
|
- основные компоненты
|
||||||
|
- ключевые взаимодействия
|
||||||
|
- интеграционные сценарии
|
||||||
|
- главные ограничения
|
||||||
|
- ссылки на дочерние документы по API, logic, domain и другим артефактам
|
||||||
|
|
||||||
|
## Ошибки оформления
|
||||||
|
|
||||||
|
- Нельзя дублировать в архитектурном обзоре полные API-контракты.
|
||||||
|
- Нельзя делать архитектурный обзор единственным документом на всю систему без декомпозиции.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Domain Entity Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `domain_entity`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать для описания одной доменной сущности, ее смысла, состояния и роли в системе.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
## Что описывать в Details
|
||||||
|
|
||||||
|
- смысл сущности
|
||||||
|
- ключевые атрибуты
|
||||||
|
- состояния или инварианты
|
||||||
|
- использование сущности в системе
|
||||||
|
- интеграции с API, workflow или внешними потребителями, если они важны для понимания модели
|
||||||
|
|
||||||
|
## Ошибки оформления
|
||||||
|
|
||||||
|
- Нельзя смешивать несколько независимых сущностей в одном документе.
|
||||||
|
- Нельзя подменять доменную сущность описанием endpoint или workflow.
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# Integration Doc Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `integration_doc`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать для описания интеграции между системами, сервисами или внешними провайдерами.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
## Что описывать в Details
|
||||||
|
|
||||||
|
- цель интеграции
|
||||||
|
- участвующие стороны
|
||||||
|
- направление обмена
|
||||||
|
- ключевой сценарий взаимодействия
|
||||||
|
- ограничения и риски
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
# Logic Block Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `logic_block`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать для описания одного законченного блока логики, workflow или процесса.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
## Что описывать в Details
|
||||||
|
|
||||||
|
- назначение логического блока
|
||||||
|
- входы и выходы
|
||||||
|
- последовательность выполнения
|
||||||
|
- интеграции
|
||||||
|
- ключевые ограничения
|
||||||
|
- состояние и ошибки, если они важны для понимания блока
|
||||||
|
|
||||||
|
## Ошибки оформления
|
||||||
|
|
||||||
|
- Нельзя описывать весь модуль целиком, если логика распадается на несколько независимых блоков.
|
||||||
|
- Нельзя превращать документ в пересказ исходного кода построчно.
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# UI Page Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для документов типа `ui_page`.
|
||||||
|
|
||||||
|
## Когда использовать
|
||||||
|
|
||||||
|
Использовать для описания одной пользовательской страницы, экрана или отдельного UI-сценария.
|
||||||
|
|
||||||
|
## Обязательная структура
|
||||||
|
|
||||||
|
Документ должен содержать:
|
||||||
|
- YAML frontmatter
|
||||||
|
- `# <title>`
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
## Что описывать в Details
|
||||||
|
|
||||||
|
- назначение страницы
|
||||||
|
- пользовательский сценарий
|
||||||
|
- основные блоки интерфейса
|
||||||
|
- связанные API и сущности
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
# Documentation Rules
|
||||||
|
|
||||||
|
Этот каталог оформляет MVP документации проекта в атомарном формате.
|
||||||
|
|
||||||
|
## Базовая структура
|
||||||
|
|
||||||
|
- Каждый документ содержит YAML frontmatter.
|
||||||
|
- В документе должен быть один `H1`, совпадающий с `title`.
|
||||||
|
- Основные разделы оформляются как `## Summary` и `## Details`.
|
||||||
|
- Внутри `Details` используются заголовки уровня `###` и ниже.
|
||||||
|
- Связи, сущности и навигация описываются во frontmatter через `related_docs`, `links`, `entities`, `parent`, `children`.
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- Краткий explain-слой быстрого контекста.
|
||||||
|
- Должен позволять быстро понять назначение документа без чтения `Details`.
|
||||||
|
- Предпочтительный формат: компактный список ключевых фактов без длинных абзацев.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
- Раскрывает полное описание объекта.
|
||||||
|
- Структура `Details` зависит от типа документа.
|
||||||
|
- Сценарии, ограничения, интеграции, ошибки и кодовые привязки должны быть разнесены по отдельным подразделам.
|
||||||
|
|
||||||
|
## API documents
|
||||||
|
|
||||||
|
Для `api_method` внутри `## Details` обязательны разделы:
|
||||||
|
- `### Описание`
|
||||||
|
- `### Сценарий`
|
||||||
|
- `### Функциональные требования`
|
||||||
|
- `### Нефункциональные требования`
|
||||||
|
- `### Контракт`
|
||||||
|
|
||||||
|
Если у метода есть интеграции и ошибки, также обязательны:
|
||||||
|
- `### Интеграции`
|
||||||
|
- `### Ошибки`
|
||||||
|
- `### Связанный код`
|
||||||
|
- `### История изменений`
|
||||||
|
|
||||||
|
### Сценарий
|
||||||
|
|
||||||
|
Сценарий оформляется как технический use case и содержит:
|
||||||
|
- название
|
||||||
|
- предусловия
|
||||||
|
- триггер
|
||||||
|
- основной сценарий
|
||||||
|
- альтернативный сценарий
|
||||||
|
- обработку ошибок
|
||||||
|
- постусловие
|
||||||
|
|
||||||
|
### Требования
|
||||||
|
|
||||||
|
- Функциональные требования маркируются как `FR-1`, `FR-2`, ...
|
||||||
|
- Нефункциональные требования маркируются как `NFR-1`, `NFR-2`, ...
|
||||||
|
- Идентификаторы требований локальны в рамках одного документа.
|
||||||
|
|
||||||
|
### Контракт
|
||||||
|
|
||||||
|
Контракт должен быть пригоден для последующей сборки OpenAPI-спецификации и включать:
|
||||||
|
- входные параметры
|
||||||
|
- выходные параметры
|
||||||
|
- структуру JSON-сообщений
|
||||||
|
- обязательность полей
|
||||||
|
- типы и ограничения
|
||||||
|
- описание полей
|
||||||
|
- правила заполнения
|
||||||
|
- примеры данных
|
||||||
|
- auth
|
||||||
|
- idempotency
|
||||||
|
- timeout
|
||||||
|
- ошибки и их HTTP-коды
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
# Documentation System
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает общую модель документации проекта.
|
||||||
|
|
||||||
|
## Базовая модель
|
||||||
|
|
||||||
|
Каждый документ должен состоять из двух слоев:
|
||||||
|
- YAML frontmatter
|
||||||
|
- контент
|
||||||
|
|
||||||
|
Контент всегда состоит из двух обязательных разделов:
|
||||||
|
- `## Summary`
|
||||||
|
- `## Details`
|
||||||
|
|
||||||
|
Над ними должен быть один заголовок `# <title>`, совпадающий со значением `title` во frontmatter.
|
||||||
|
|
||||||
|
## Принципы
|
||||||
|
|
||||||
|
- Документы должны быть атомарными.
|
||||||
|
- Один документ описывает одну тему.
|
||||||
|
- Вместо дублирования между документами используются явные ссылки.
|
||||||
|
- Связи и навигация должны быть формализованы.
|
||||||
|
- Документы должны быть пригодны для чтения человеком и для RAG.
|
||||||
|
- Документы должны быть пригодны для частичного обновления без деградации структуры.
|
||||||
|
|
||||||
|
## Типы документов
|
||||||
|
|
||||||
|
На уровне проекта поддерживаются типы:
|
||||||
|
- `api_method`
|
||||||
|
- `logic_block`
|
||||||
|
- `architecture_overview`
|
||||||
|
- `domain_entity`
|
||||||
|
- `ui_page`
|
||||||
|
- `integration_doc`
|
||||||
|
- `index_page`
|
||||||
|
- `glossary_item`
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
# Frontmatter Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл описывает единый контракт YAML frontmatter для всех документов.
|
||||||
|
|
||||||
|
## Обязательные поля
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
id: string
|
||||||
|
title: string
|
||||||
|
doc_type: string
|
||||||
|
domain: string
|
||||||
|
sub_domain: string
|
||||||
|
related_docs: []
|
||||||
|
status: string
|
||||||
|
```
|
||||||
|
|
||||||
|
## Поля совместимости и рекомендуемые поля
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
type: string
|
||||||
|
name: string
|
||||||
|
module: string
|
||||||
|
layer: string
|
||||||
|
updated_at: YYYY-MM-DD
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
parent: string | null
|
||||||
|
children: []
|
||||||
|
links: {}
|
||||||
|
source_of_truth: string
|
||||||
|
related_code: []
|
||||||
|
system_analytics_refs: []
|
||||||
|
```
|
||||||
|
|
||||||
|
## Правила
|
||||||
|
|
||||||
|
- `id` должен быть стабильным и уникальным в пределах документации проекта.
|
||||||
|
- `title` — человекочитаемый заголовок.
|
||||||
|
- `doc_type` — канонический тип документа.
|
||||||
|
- `domain` и `sub_domain` определяют бизнес-контекст документа.
|
||||||
|
- `related_docs` хранит явные связи с другими markdown-документами.
|
||||||
|
- `status` хранит жизненный цикл документа: например `draft`, `approved`, `active`.
|
||||||
|
- `type` допустимо дублировать как alias для tooling-совместимости с индексаторами.
|
||||||
|
- `name` — короткое системное имя документа.
|
||||||
|
- `module` — модуль или подсистема.
|
||||||
|
- `layer` — слой системы.
|
||||||
|
- `updated_at` хранится в формате `YYYY-MM-DD`.
|
||||||
|
|
||||||
|
## Связи и навигация
|
||||||
|
|
||||||
|
- `entities` описывает сущности, связанные с документом.
|
||||||
|
- `parent` и `children` описывают иерархию.
|
||||||
|
- `links` описывает typed graph связей между документами, кодом и интеграциями.
|
||||||
|
|
||||||
|
## Формат links
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
links:
|
||||||
|
called_by:
|
||||||
|
- ext.health_probe
|
||||||
|
uses_logic:
|
||||||
|
- logic.some_flow
|
||||||
|
integrates_with:
|
||||||
|
- ext.some_system
|
||||||
|
```
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
# Linking Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл описывает, как связывать документы между собой.
|
||||||
|
|
||||||
|
## Иерархия
|
||||||
|
|
||||||
|
- `parent` используется для родительского документа.
|
||||||
|
- `children` используется для прямых дочерних документов.
|
||||||
|
- Иерархия должна быть осмысленной и стабильной.
|
||||||
|
- Для общей точки входа допустим `index_page`.
|
||||||
|
|
||||||
|
## Графовые связи
|
||||||
|
|
||||||
|
Для `related_docs` используются ссылки на соседние документы.
|
||||||
|
|
||||||
|
Для `links` рекомендуется использовать typed-ключи:
|
||||||
|
- `called_by`
|
||||||
|
- `uses_logic`
|
||||||
|
- `reads_db`
|
||||||
|
- `writes_db`
|
||||||
|
- `integrates_with`
|
||||||
|
- `used_by`
|
||||||
|
- `exposes_api`
|
||||||
|
- `uses_entities`
|
||||||
|
|
||||||
|
## Правила использования
|
||||||
|
|
||||||
|
- Если документ логически входит в другой, использовать `parent`/`children`.
|
||||||
|
- Если связь нужна для навигации между равноправными документами, дублировать ее в `related_docs`.
|
||||||
|
- Если связь отражает поведение, интеграции или переиспользование, фиксировать ее в `links`.
|
||||||
|
- Детальное описание интеграций хранить в body документа, а не только во frontmatter.
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# Naming Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл описывает правила именования документов, файлов и идентификаторов.
|
||||||
|
|
||||||
|
## Правила для файлов
|
||||||
|
|
||||||
|
- Имена файлов должны быть в kebab-case.
|
||||||
|
- Имя файла должно отражать одну тему.
|
||||||
|
- Для шаблонов использовать суффикс `.template.md`.
|
||||||
|
|
||||||
|
## Правила для id
|
||||||
|
|
||||||
|
- `id` строится в формате `<type-group>.<name>`.
|
||||||
|
- Примеры:
|
||||||
|
- `api.send_message_endpoint`
|
||||||
|
- `logic.telegram_notification_loop`
|
||||||
|
- `architecture.telegram_notify_app`
|
||||||
|
|
||||||
|
## Правила для title
|
||||||
|
|
||||||
|
- `title` должен быть кратким и человекочитаемым.
|
||||||
|
- В `title` допускаются пробелы и естественный язык.
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# Writing Style
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила стиля для текстового наполнения документации.
|
||||||
|
|
||||||
|
## Правила стиля
|
||||||
|
|
||||||
|
- Текст должен быть лаконичным.
|
||||||
|
- Формулировки должны быть точными и техническими.
|
||||||
|
- Summary должен быть кратким explain-слоем.
|
||||||
|
- Details должен раскрывать суть без лишней воды.
|
||||||
|
- Нежелательно смешивать несколько тем в одном документе.
|
||||||
|
- Если детали относятся к другому артефакту, их нужно выносить в отдельный документ.
|
||||||
|
|
||||||
|
## Язык
|
||||||
|
|
||||||
|
- Основной язык документации — русский.
|
||||||
|
- Технические термины, названия классов, API, RAG, OpenAPI, runtime и другие устоявшиеся identifiers можно оставлять на английском.
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# API Contract Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл описывает, как оформлять подраздел `## Контракт` в API-документах.
|
||||||
|
|
||||||
|
## Что должно быть описано
|
||||||
|
|
||||||
|
- входные параметры
|
||||||
|
- выходные параметры
|
||||||
|
- JSON-структуры запросов и ответов
|
||||||
|
- обязательность полей
|
||||||
|
- типы полей
|
||||||
|
- ограничения
|
||||||
|
- описание назначения полей
|
||||||
|
- примеры данных
|
||||||
|
- auth
|
||||||
|
- idempotency
|
||||||
|
- timeout
|
||||||
|
- ошибки и их HTTP-коды
|
||||||
|
|
||||||
|
## Правило качества
|
||||||
|
|
||||||
|
Контракт должен быть достаточно формальным, чтобы по нему можно было собрать OpenAPI-спецификацию.
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
# API Scenario Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл описывает, как оформлять подраздел `### Сценарий` в API-документах.
|
||||||
|
|
||||||
|
## Обязательные части
|
||||||
|
|
||||||
|
- название
|
||||||
|
- предусловия
|
||||||
|
- триггер
|
||||||
|
- основной сценарий
|
||||||
|
- альтернативный сценарий
|
||||||
|
- обработка ошибок
|
||||||
|
- постусловие
|
||||||
|
|
||||||
|
## Правила
|
||||||
|
|
||||||
|
- Сценарий должен быть лаконичным.
|
||||||
|
- Сценарий должен отражать суть шага.
|
||||||
|
- Сложные технические детали надо выносить в `FR-*`.
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
# Details Section Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает общие правила для секции `## Details`.
|
||||||
|
|
||||||
|
## Правила
|
||||||
|
|
||||||
|
- `Details` оформляется как `## Details`.
|
||||||
|
- Внутри `Details` используются заголовки уровня `###` и ниже.
|
||||||
|
- Структура Details зависит от типа документа.
|
||||||
|
- В Details не нужно повторно дублировать навигацию и связи, если они уже есть во frontmatter.
|
||||||
|
- Интеграции, ошибки и кодовые привязки должны быть выделены в отдельные подразделы, если они существенны для понимания документа.
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
# Requirements Format Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает формат для функциональных и нефункциональных требований.
|
||||||
|
|
||||||
|
## Функциональные требования
|
||||||
|
|
||||||
|
- Использовать коды `FR-1`, `FR-2`, `FR-3` и так далее.
|
||||||
|
- Каждое требование должно описывать отдельный обязательный аспект поведения.
|
||||||
|
- Идентификаторы локальны в пределах одного документа.
|
||||||
|
|
||||||
|
## Нефункциональные требования
|
||||||
|
|
||||||
|
- Использовать коды `NFR-1`, `NFR-2`, `NFR-3` и так далее.
|
||||||
|
- Требования должны описывать характеристики качества, ограничения и эксплуатационные свойства.
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
# Summary Section Rules
|
||||||
|
|
||||||
|
## Назначение
|
||||||
|
|
||||||
|
Этот файл задает правила для секции `## Summary`.
|
||||||
|
|
||||||
|
## Правила
|
||||||
|
|
||||||
|
- Summary должен быть коротким explain-слоем быстрого контекста.
|
||||||
|
- Summary должен объяснять суть документа без лишних деталей.
|
||||||
|
- Summary должен быть пригоден для explain и быстрого чтения.
|
||||||
|
- Предпочтительный формат: список ключевых фактов `Purpose`, `Actor`, `Trigger`, `Errors`, `Related ...` и т.д.
|
||||||
|
- Для крупных документов допустим более длинный summary, если он остается структурированным.
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
---
|
||||||
|
id: api.example_method
|
||||||
|
type: api_method
|
||||||
|
doc_type: api_method
|
||||||
|
name: example_method
|
||||||
|
title: HTTP API /example
|
||||||
|
module: example_module
|
||||||
|
layer: application
|
||||||
|
domain: example_domain
|
||||||
|
sub_domain: example_subdomain
|
||||||
|
related_docs: []
|
||||||
|
status: draft
|
||||||
|
updated_at: 2026-03-20
|
||||||
|
source_of_truth: code
|
||||||
|
parent: null
|
||||||
|
children: []
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
links: {}
|
||||||
|
---
|
||||||
|
|
||||||
|
# HTTP API /example
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Краткое описание метода.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
## Описание
|
||||||
|
|
||||||
|
Короткое описание сути метода.
|
||||||
|
|
||||||
|
## Сценарий
|
||||||
|
|
||||||
|
**Название:**
|
||||||
|
|
||||||
|
**Предусловия:**
|
||||||
|
-
|
||||||
|
|
||||||
|
**Триггер:**
|
||||||
|
-
|
||||||
|
|
||||||
|
**Основной сценарий:**
|
||||||
|
1.
|
||||||
|
|
||||||
|
**Альтернативный сценарий:**
|
||||||
|
1.
|
||||||
|
|
||||||
|
**Обработка ошибок:**
|
||||||
|
1.
|
||||||
|
|
||||||
|
**Постусловие:**
|
||||||
|
-
|
||||||
|
|
||||||
|
## Функциональные требования
|
||||||
|
|
||||||
|
**FR-1.**
|
||||||
|
|
||||||
|
## Нефункциональные требования
|
||||||
|
|
||||||
|
**NFR-1.**
|
||||||
|
|
||||||
|
## Контракт
|
||||||
|
|
||||||
|
### Входные параметры
|
||||||
|
|
||||||
|
| Параметр | Где передается | Тип | Обязательность | Ограничения | Описание | Пример |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| | | | | | | |
|
||||||
|
|
||||||
|
### Выходные параметры
|
||||||
|
|
||||||
|
| Поле | Тип | Обязательность | Ограничения | Описание | Заполнение | Пример |
|
||||||
|
|---|---|---|---|---|---|---|
|
||||||
|
| | | | | | | |
|
||||||
|
|
||||||
|
### Интеграции
|
||||||
|
|
||||||
|
### Ошибки
|
||||||
|
|
||||||
|
### Связанный код
|
||||||
|
|
||||||
|
### История изменений
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
id: architecture.example_system
|
||||||
|
type: architecture_overview
|
||||||
|
doc_type: architecture_overview
|
||||||
|
name: example_system
|
||||||
|
title: Обзор архитектуры Example System
|
||||||
|
module: example_module
|
||||||
|
layer: system
|
||||||
|
domain: example_domain
|
||||||
|
sub_domain: example_subdomain
|
||||||
|
related_docs: []
|
||||||
|
status: draft
|
||||||
|
updated_at: 2026-03-20
|
||||||
|
source_of_truth: mixed
|
||||||
|
parent: null
|
||||||
|
children: []
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
links: {}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Обзор архитектуры Example System
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Краткое описание архитектуры.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
### Описание
|
||||||
|
|
||||||
|
### Контекст
|
||||||
|
|
||||||
|
### Границы системы
|
||||||
|
|
||||||
|
### Компоненты
|
||||||
|
|
||||||
|
### Интеграционные сценарии
|
||||||
|
|
||||||
|
### Интеграции
|
||||||
|
|
||||||
|
### Ограничения
|
||||||
|
|
||||||
|
### Связанный код
|
||||||
|
|
||||||
|
### Связанные документы
|
||||||
|
|
||||||
|
### История изменений
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
id: domain.example_entity
|
||||||
|
type: domain_entity
|
||||||
|
doc_type: domain_entity
|
||||||
|
name: example_entity
|
||||||
|
title: Пример доменной сущности
|
||||||
|
module: example_module
|
||||||
|
layer: domain
|
||||||
|
domain: example_domain
|
||||||
|
sub_domain: example_subdomain
|
||||||
|
related_docs: []
|
||||||
|
status: draft
|
||||||
|
updated_at: 2026-03-20
|
||||||
|
source_of_truth: code
|
||||||
|
parent: null
|
||||||
|
children: []
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
links: {}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Пример доменной сущности
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Краткое описание сущности.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
### Описание
|
||||||
|
|
||||||
|
### Модель данных
|
||||||
|
|
||||||
|
### Состояния и инварианты
|
||||||
|
|
||||||
|
### Технический use case
|
||||||
|
|
||||||
|
### Функциональные требования
|
||||||
|
|
||||||
|
### Нефункциональные требования
|
||||||
|
|
||||||
|
### Интеграции
|
||||||
|
|
||||||
|
### Связанный код
|
||||||
|
|
||||||
|
### Связанные документы
|
||||||
|
|
||||||
|
### История изменений
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
---
|
||||||
|
id: logic.example_block
|
||||||
|
type: logic_block
|
||||||
|
doc_type: logic_block
|
||||||
|
name: example_block
|
||||||
|
title: Пример блока логики
|
||||||
|
module: example_module
|
||||||
|
layer: application
|
||||||
|
domain: example_domain
|
||||||
|
sub_domain: example_subdomain
|
||||||
|
related_docs: []
|
||||||
|
status: draft
|
||||||
|
updated_at: 2026-03-20
|
||||||
|
source_of_truth: code
|
||||||
|
parent: null
|
||||||
|
children: []
|
||||||
|
tags: []
|
||||||
|
entities: []
|
||||||
|
links: {}
|
||||||
|
---
|
||||||
|
|
||||||
|
# Пример блока логики
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Краткое описание блока логики.
|
||||||
|
|
||||||
|
## Details
|
||||||
|
|
||||||
|
### Описание
|
||||||
|
|
||||||
|
### Контекст
|
||||||
|
|
||||||
|
### Технический use case
|
||||||
|
|
||||||
|
### Функциональные требования
|
||||||
|
|
||||||
|
### Нефункциональные требования
|
||||||
|
|
||||||
|
### Интеграции
|
||||||
|
|
||||||
|
### Ограничения и условия вызова
|
||||||
|
|
||||||
|
### Ошибки и деградации
|
||||||
|
|
||||||
|
### Связанные API
|
||||||
|
|
||||||
|
### Связанный код
|
||||||
|
|
||||||
|
### История изменений
|
||||||
@@ -16,3 +16,4 @@ class QueryFeatures:
|
|||||||
logic_markers: list[str]
|
logic_markers: list[str]
|
||||||
domain_markers: list[str]
|
domain_markers: list[str]
|
||||||
endpoint_markers: list[str]
|
endpoint_markers: list[str]
|
||||||
|
scope_type: str = "unknown"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import re
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
|
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
|
||||||
from app.core.agent.processes.v2.models import V2RouteAnchors
|
from app.core.agent.utils.process_v2.models import V2RouteAnchors
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
|
|||||||
@@ -0,0 +1,176 @@
|
|||||||
|
"""Build an in-memory DOCS scope index from D1/D3 catalog rows (no chunk retrieval).
|
||||||
|
|
||||||
|
Parses metadata from ``D1_DOCUMENT_CATALOG`` and ``D3_ENTITY_CATALOG`` rows produced by the
|
||||||
|
existing RAG indexer—no additional layers or chunk scans.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_text(value: object) -> str:
|
||||||
|
return re.sub(r"\s+", " ", str(value or "").strip().lower())
|
||||||
|
|
||||||
|
|
||||||
|
def _split_multi(value: object) -> list[str]:
|
||||||
|
if value is None:
|
||||||
|
return []
|
||||||
|
if isinstance(value, list):
|
||||||
|
raw = value
|
||||||
|
else:
|
||||||
|
raw = re.split(r"[;,|]", str(value))
|
||||||
|
out: list[str] = []
|
||||||
|
for item in raw:
|
||||||
|
s = str(item).strip()
|
||||||
|
if s:
|
||||||
|
out.append(s)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DocsScopeCatalog:
|
||||||
|
"""Flattened terms from D1_DOCUMENT_CATALOG and D3_ENTITY_CATALOG for lexical grounding."""
|
||||||
|
|
||||||
|
domain_values: set[str] = field(default_factory=set)
|
||||||
|
subdomain_pairs: list[tuple[str, str]] = field(default_factory=list) # (domain, subdomain)
|
||||||
|
entity_records: list[dict[str, object]] = field(default_factory=list)
|
||||||
|
api_records: list[dict[str, object]] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def build_docs_scope_catalog(rows: list[dict]) -> DocsScopeCatalog:
|
||||||
|
"""Derive searchable terms from catalog layers only (existing RAG index rows)."""
|
||||||
|
catalog = DocsScopeCatalog()
|
||||||
|
for row in rows:
|
||||||
|
layer = str(row.get("layer") or "")
|
||||||
|
meta = row.get("metadata")
|
||||||
|
if not isinstance(meta, dict):
|
||||||
|
meta = {}
|
||||||
|
path = str(row.get("path") or "")
|
||||||
|
title = str(row.get("title") or "")
|
||||||
|
content = str(row.get("content") or "")
|
||||||
|
|
||||||
|
if layer == "D1_DOCUMENT_CATALOG":
|
||||||
|
_ingest_d1_row(catalog, path=path, title=title, content=content, metadata=meta)
|
||||||
|
elif layer == "D3_ENTITY_CATALOG":
|
||||||
|
_ingest_d3_row(catalog, path=path, title=title, metadata=meta)
|
||||||
|
|
||||||
|
return catalog
|
||||||
|
|
||||||
|
|
||||||
|
def _ingest_d1_row(
|
||||||
|
catalog: DocsScopeCatalog,
|
||||||
|
*,
|
||||||
|
path: str,
|
||||||
|
title: str,
|
||||||
|
content: str,
|
||||||
|
metadata: dict,
|
||||||
|
) -> None:
|
||||||
|
doc_type = _norm_text(metadata.get("type") or metadata.get("doc_type"))
|
||||||
|
domain = _norm_text(metadata.get("domain"))
|
||||||
|
subdomain = _norm_text(metadata.get("subdomain"))
|
||||||
|
name = _norm_text(metadata.get("name"))
|
||||||
|
summary = _norm_text(metadata.get("summary_text"))
|
||||||
|
endpoint = _norm_text(metadata.get("endpoint"))
|
||||||
|
|
||||||
|
entities = [_norm_text(e) for e in _split_multi(metadata.get("entities"))]
|
||||||
|
tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
catalog.domain_values.add(domain)
|
||||||
|
if domain and subdomain:
|
||||||
|
catalog.subdomain_pairs.append((domain, subdomain))
|
||||||
|
|
||||||
|
blob = " ".join(x for x in (name, title, summary, content) if x)
|
||||||
|
for ent in entities:
|
||||||
|
if ent:
|
||||||
|
catalog.entity_records.append(
|
||||||
|
{
|
||||||
|
"name": ent,
|
||||||
|
"domain": domain or None,
|
||||||
|
"subdomain": subdomain or None,
|
||||||
|
"source_layer": "D1_DOCUMENT_CATALOG",
|
||||||
|
"path": path,
|
||||||
|
"blob": blob,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
for tag in tags:
|
||||||
|
if tag and len(tag) >= 3:
|
||||||
|
catalog.entity_records.append(
|
||||||
|
{
|
||||||
|
"name": tag,
|
||||||
|
"domain": domain or None,
|
||||||
|
"subdomain": subdomain or None,
|
||||||
|
"source_layer": "D1_DOCUMENT_CATALOG",
|
||||||
|
"path": path,
|
||||||
|
"blob": blob,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
is_api_method = doc_type == "api_method" or "api_method" in path.lower()
|
||||||
|
if is_api_method or endpoint:
|
||||||
|
ep = endpoint or _endpoint_from_title(title)
|
||||||
|
if ep:
|
||||||
|
catalog.api_records.append(
|
||||||
|
{
|
||||||
|
"endpoint": ep,
|
||||||
|
"domain": domain or None,
|
||||||
|
"source_layer": "D1_DOCUMENT_CATALOG",
|
||||||
|
"path": path,
|
||||||
|
"title": title,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ingest_d3_row(
|
||||||
|
catalog: DocsScopeCatalog,
|
||||||
|
*,
|
||||||
|
path: str,
|
||||||
|
title: str,
|
||||||
|
metadata: dict,
|
||||||
|
) -> None:
|
||||||
|
entity_name = str(metadata.get("entity_name") or "").strip()
|
||||||
|
domain = _norm_text(metadata.get("domain"))
|
||||||
|
subdomain = _norm_text(metadata.get("subdomain"))
|
||||||
|
module = _norm_text(metadata.get("module"))
|
||||||
|
source_path = str(metadata.get("source_path") or "").strip()
|
||||||
|
tags = [_norm_text(t) for t in _split_multi(metadata.get("tags"))]
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
catalog.domain_values.add(domain)
|
||||||
|
if domain and subdomain:
|
||||||
|
catalog.subdomain_pairs.append((domain, subdomain))
|
||||||
|
|
||||||
|
blob = " ".join(
|
||||||
|
_norm_text(x)
|
||||||
|
for x in (entity_name, title, module, source_path, " ".join(tags))
|
||||||
|
if x
|
||||||
|
)
|
||||||
|
if entity_name:
|
||||||
|
catalog.entity_records.append(
|
||||||
|
{
|
||||||
|
"name": _norm_text(entity_name),
|
||||||
|
"domain": domain or None,
|
||||||
|
"subdomain": subdomain or None,
|
||||||
|
"module": module or None,
|
||||||
|
"source_layer": "D3_ENTITY_CATALOG",
|
||||||
|
"path": path or source_path,
|
||||||
|
"blob": blob,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _endpoint_from_title(title: str) -> str:
|
||||||
|
t = str(title or "").strip()
|
||||||
|
if not t:
|
||||||
|
return ""
|
||||||
|
upper = t.upper()
|
||||||
|
for method in ("GET ", "POST ", "PUT ", "PATCH ", "DELETE "):
|
||||||
|
if method in upper:
|
||||||
|
idx = upper.index(method)
|
||||||
|
rest = t[idx:].split()
|
||||||
|
if len(rest) >= 2 and rest[1].startswith("/"):
|
||||||
|
return _norm_text(rest[1])
|
||||||
|
m = re.search(r"(\/[a-z0-9_./{}-]+)", t, re.IGNORECASE)
|
||||||
|
return _norm_text(m.group(1)) if m else ""
|
||||||
@@ -0,0 +1,443 @@
|
|||||||
|
"""Deterministic scope resolution from query + derived DOCS catalog (pre-LLM).
|
||||||
|
|
||||||
|
Matches the user query against catalog terms (exact / normalized). Optional embedding-based
|
||||||
|
retrieval can extend candidates later; final ``scope_type`` never relies on embeddings alone.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog
|
||||||
|
from app.core.agent.processes.v2.intent_router.modules.target_terms import TargetTermsAnalysis
|
||||||
|
from app.core.agent.utils.process_v2.models import ScopeCandidate, V2ScopeType
|
||||||
|
|
||||||
|
|
||||||
|
_SCORE_EXACT = 1.0
|
||||||
|
_SCORE_NORMALIZED = 0.88
|
||||||
|
_SCORE_SOFT = 0.72
|
||||||
|
_STRONG_THRESHOLD = 0.85
|
||||||
|
|
||||||
|
_ENUM_MARKERS_RU = (
|
||||||
|
"какие ",
|
||||||
|
"какие\n",
|
||||||
|
"какой ",
|
||||||
|
"какого ",
|
||||||
|
"список",
|
||||||
|
"перечисли",
|
||||||
|
"перечислить",
|
||||||
|
"все api",
|
||||||
|
"все методы",
|
||||||
|
"какие api",
|
||||||
|
"какие методы",
|
||||||
|
"каких ",
|
||||||
|
)
|
||||||
|
_SINGLE_SEGMENT_ENDPOINT_ALLOWLIST = frozenset(
|
||||||
|
{
|
||||||
|
"/health",
|
||||||
|
"/send",
|
||||||
|
"/healthz",
|
||||||
|
"/ready",
|
||||||
|
"/live",
|
||||||
|
"/metrics",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_PROJECT_WIDE_MARKERS = (
|
||||||
|
"в проекте",
|
||||||
|
"в системе",
|
||||||
|
"в приложении",
|
||||||
|
"по проекту",
|
||||||
|
"во всем проекте",
|
||||||
|
"overall",
|
||||||
|
"in the project",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class ScopeResolution:
|
||||||
|
scope_type: str = V2ScopeType.UNKNOWN
|
||||||
|
candidate_domains: list[ScopeCandidate] = field(default_factory=list)
|
||||||
|
candidate_subdomains: list[ScopeCandidate] = field(default_factory=list)
|
||||||
|
candidate_entities: list[ScopeCandidate] = field(default_factory=list)
|
||||||
|
candidate_apis: list[ScopeCandidate] = field(default_factory=list)
|
||||||
|
strong_domain: str | None = None
|
||||||
|
strong_subdomain: str | None = None
|
||||||
|
strong_entity_names: list[str] = field(default_factory=list)
|
||||||
|
strong_endpoint_paths: list[str] = field(default_factory=list)
|
||||||
|
catalog_loaded: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
def _catalog_has_index_terms(catalog: DocsScopeCatalog) -> bool:
|
||||||
|
return bool(
|
||||||
|
catalog.domain_values
|
||||||
|
or catalog.subdomain_pairs
|
||||||
|
or catalog.entity_records
|
||||||
|
or catalog.api_records
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def plausible_doc_endpoint_paths(paths: list[str]) -> list[str]:
|
||||||
|
"""Drop spurious ``/token`` paths from api-like heuristics (e.g. ``/billing`` after ``api``)."""
|
||||||
|
out: list[str] = []
|
||||||
|
for raw in paths:
|
||||||
|
p = str(raw or "").strip().lower()
|
||||||
|
if not p.startswith("/"):
|
||||||
|
continue
|
||||||
|
segments = [s for s in p.split("/") if s]
|
||||||
|
if len(segments) >= 2:
|
||||||
|
out.append(p)
|
||||||
|
continue
|
||||||
|
if len(segments) == 1 and p in _SINGLE_SEGMENT_ENDPOINT_ALLOWLIST:
|
||||||
|
out.append(p)
|
||||||
|
continue
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_docs_scope(
|
||||||
|
normalized_query: str,
|
||||||
|
terms: TargetTermsAnalysis,
|
||||||
|
catalog: DocsScopeCatalog | None,
|
||||||
|
) -> ScopeResolution:
|
||||||
|
"""Lexical scope resolution; embeddings never set final scope alone (not used here)."""
|
||||||
|
resolution = ScopeResolution()
|
||||||
|
if catalog is None:
|
||||||
|
return resolution
|
||||||
|
if not _catalog_has_index_terms(catalog):
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
resolution.catalog_loaded = True
|
||||||
|
query_l = _norm_query(normalized_query)
|
||||||
|
if not query_l:
|
||||||
|
resolution.scope_type = V2ScopeType.UNKNOWN
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
_collect_domain_candidates(query_l, catalog, resolution)
|
||||||
|
_collect_subdomain_candidates(query_l, catalog, resolution)
|
||||||
|
_collect_entity_candidates(query_l, catalog, resolution)
|
||||||
|
_collect_api_candidates(query_l, catalog, resolution)
|
||||||
|
|
||||||
|
_dedupe_candidates(resolution)
|
||||||
|
|
||||||
|
endpoint_paths = plausible_doc_endpoint_paths(list(terms.endpoint_paths))
|
||||||
|
strong_api = _pick_strong(resolution.candidate_apis)
|
||||||
|
strong_entity = _pick_strong(resolution.candidate_entities)
|
||||||
|
strong_sub = _pick_strong(resolution.candidate_subdomains)
|
||||||
|
strong_dom = _pick_strong(resolution.candidate_domains)
|
||||||
|
|
||||||
|
resolution.strong_endpoint_paths = list(dict.fromkeys(endpoint_paths))
|
||||||
|
|
||||||
|
if endpoint_paths:
|
||||||
|
resolution.scope_type = V2ScopeType.ENTITY
|
||||||
|
resolution.strong_entity_names = _merge_unique(resolution.strong_entity_names, _entities_for_endpoints(endpoint_paths, catalog))
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
if strong_api and strong_api.score >= _STRONG_THRESHOLD:
|
||||||
|
resolution.scope_type = V2ScopeType.ENTITY
|
||||||
|
resolution.strong_endpoint_paths = _merge_unique(resolution.strong_endpoint_paths, [strong_api.value])
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
strong_sub_pre = _pick_strong(resolution.candidate_subdomains)
|
||||||
|
if (
|
||||||
|
strong_sub_pre
|
||||||
|
and strong_sub_pre.score >= _STRONG_THRESHOLD
|
||||||
|
and _subdomain_aligned_with_query(query_l, strong_sub_pre.value)
|
||||||
|
):
|
||||||
|
resolution.scope_type = V2ScopeType.SUBDOMAIN
|
||||||
|
parts = _split_subdomain_value(strong_sub_pre.value)
|
||||||
|
if parts:
|
||||||
|
resolution.strong_domain = parts[0]
|
||||||
|
resolution.strong_subdomain = parts[1]
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
if strong_entity and strong_entity.score >= _STRONG_THRESHOLD:
|
||||||
|
resolution.scope_type = V2ScopeType.ENTITY
|
||||||
|
resolution.strong_entity_names = _merge_unique(
|
||||||
|
resolution.strong_entity_names,
|
||||||
|
[strong_entity.value],
|
||||||
|
)
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
if strong_sub and strong_sub.score >= _STRONG_THRESHOLD:
|
||||||
|
resolution.scope_type = V2ScopeType.SUBDOMAIN
|
||||||
|
parts = _split_subdomain_value(strong_sub.value)
|
||||||
|
if parts:
|
||||||
|
resolution.strong_domain = parts[0]
|
||||||
|
resolution.strong_subdomain = parts[1]
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
if strong_dom and strong_dom.score >= _STRONG_THRESHOLD:
|
||||||
|
resolution.scope_type = V2ScopeType.DOMAIN
|
||||||
|
resolution.strong_domain = strong_dom.value
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
if _is_global_enumeration(query_l, has_strong_any=bool(_any_strong(resolution))):
|
||||||
|
resolution.scope_type = V2ScopeType.GLOBAL
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
resolution.scope_type = V2ScopeType.UNKNOWN
|
||||||
|
return resolution
|
||||||
|
|
||||||
|
|
||||||
|
def promote_target_terms(
|
||||||
|
raw_terms: list[str],
|
||||||
|
terms: TargetTermsAnalysis,
|
||||||
|
resolution: ScopeResolution,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Keep only high-confidence terms in ``target_terms``; weak matches stay in candidate_* only."""
|
||||||
|
if not resolution.catalog_loaded:
|
||||||
|
return list(raw_terms)
|
||||||
|
out: list[str] = []
|
||||||
|
strong_values = {c.value for c in _all_candidates(resolution) if c.score >= _STRONG_THRESHOLD}
|
||||||
|
strong_values |= {c.value for c in _all_candidates(resolution) if c.match_type == "exact"}
|
||||||
|
strong_entity = set(resolution.strong_entity_names)
|
||||||
|
endpoints = set(terms.endpoint_paths)
|
||||||
|
aliases = set(terms.matched_aliases)
|
||||||
|
|
||||||
|
for term in raw_terms:
|
||||||
|
t = str(term or "").strip()
|
||||||
|
if not t:
|
||||||
|
continue
|
||||||
|
tl = t.lower()
|
||||||
|
if t in endpoints or tl in {e.lower() for e in endpoints}:
|
||||||
|
_append_unique(out, tl if tl.startswith("/") else t)
|
||||||
|
continue
|
||||||
|
if t in aliases or tl in {a.lower() for a in aliases}:
|
||||||
|
_append_unique(out, tl)
|
||||||
|
continue
|
||||||
|
if tl in strong_values or t in strong_entity:
|
||||||
|
_append_unique(out, tl)
|
||||||
|
continue
|
||||||
|
if _is_explicit_identifier(t) and tl in strong_entity:
|
||||||
|
_append_unique(out, tl)
|
||||||
|
continue
|
||||||
|
# Drop weak/ungrounded terms (remain only in candidates on anchors)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _all_candidates(resolution: ScopeResolution) -> list[ScopeCandidate]:
|
||||||
|
return [
|
||||||
|
*resolution.candidate_domains,
|
||||||
|
*resolution.candidate_subdomains,
|
||||||
|
*resolution.candidate_entities,
|
||||||
|
*resolution.candidate_apis,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _any_strong(resolution: ScopeResolution) -> bool:
|
||||||
|
return any(c.score >= _STRONG_THRESHOLD for c in _all_candidates(resolution))
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_strong(candidates: list[ScopeCandidate]) -> ScopeCandidate | None:
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
return max(candidates, key=lambda c: (c.score, len(c.value)))
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_query(q: str) -> str:
|
||||||
|
return re.sub(r"\s+", " ", str(q or "").strip().lower())
|
||||||
|
|
||||||
|
|
||||||
|
def _append_unique(items: list[str], value: str) -> None:
|
||||||
|
if value and value not in items:
|
||||||
|
items.append(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_unique(a: list[str], b: list[str]) -> list[str]:
|
||||||
|
return list(dict.fromkeys([*a, *b]))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_explicit_identifier(token: str) -> bool:
|
||||||
|
return bool(re.fullmatch(r"[A-Za-z][A-Za-z0-9_]+", token))
|
||||||
|
|
||||||
|
|
||||||
|
def _split_subdomain_value(value: str) -> tuple[str, str] | None:
|
||||||
|
parts = str(value or "").split("::", 1)
|
||||||
|
if len(parts) == 2 and parts[0] and parts[1]:
|
||||||
|
return parts[0].strip().lower(), parts[1].strip().lower()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _subdomain_aligned_with_query(query_l: str, composite: str) -> bool:
|
||||||
|
"""True when both domain and subdomain tokens match the query (substring / token match)."""
|
||||||
|
parts = str(composite or "").split("::", 1)
|
||||||
|
if len(parts) != 2:
|
||||||
|
return False
|
||||||
|
dom, sub = parts[0].strip().lower(), parts[1].strip().lower()
|
||||||
|
s_dom, _ = _match_score(query_l, dom)
|
||||||
|
s_sub, _ = _match_score(query_l, sub)
|
||||||
|
return s_dom > 0 and s_sub > 0
|
||||||
|
|
||||||
|
|
||||||
|
def _entities_for_endpoints(endpoint_paths: list[str], catalog: DocsScopeCatalog) -> list[str]:
|
||||||
|
found: list[str] = []
|
||||||
|
eps = {e.lower() for e in endpoint_paths if e}
|
||||||
|
for rec in catalog.entity_records:
|
||||||
|
blob = str(rec.get("blob") or "").lower()
|
||||||
|
name = str(rec.get("name") or "").strip().lower()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
if any(ep and ep in blob for ep in eps):
|
||||||
|
_append_unique(found, name)
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_domain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
|
||||||
|
for dom in catalog.domain_values:
|
||||||
|
if not dom:
|
||||||
|
continue
|
||||||
|
score, mtype = _match_score(query_l, dom)
|
||||||
|
if score <= 0:
|
||||||
|
continue
|
||||||
|
resolution.candidate_domains.append(
|
||||||
|
ScopeCandidate(
|
||||||
|
value=dom,
|
||||||
|
score=score,
|
||||||
|
source_layer="D1_DOCUMENT_CATALOG",
|
||||||
|
match_type=mtype,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_subdomain_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
|
||||||
|
seen: set[str] = set()
|
||||||
|
for dom, sub in catalog.subdomain_pairs:
|
||||||
|
if not dom or not sub:
|
||||||
|
continue
|
||||||
|
composite = f"{dom}::{sub}"
|
||||||
|
if composite in seen:
|
||||||
|
continue
|
||||||
|
seen.add(composite)
|
||||||
|
score_dom, _ = _match_score(query_l, dom)
|
||||||
|
score_sub, mt_sub = _match_score(query_l, sub)
|
||||||
|
phrase = _phrase_score(query_l, dom, sub)
|
||||||
|
if phrase > 0:
|
||||||
|
score = phrase
|
||||||
|
mt = "normalized"
|
||||||
|
elif score_dom > 0 and score_sub > 0:
|
||||||
|
score = min(score_dom, score_sub)
|
||||||
|
mt = mt_sub
|
||||||
|
else:
|
||||||
|
# Avoid promoting a (domain, subdomain) pair when only the domain token matches.
|
||||||
|
score = 0.0
|
||||||
|
mt = mt_sub
|
||||||
|
if score <= 0:
|
||||||
|
continue
|
||||||
|
resolution.candidate_subdomains.append(
|
||||||
|
ScopeCandidate(
|
||||||
|
value=composite,
|
||||||
|
score=score,
|
||||||
|
source_layer="D1_DOCUMENT_CATALOG",
|
||||||
|
match_type=mt,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_entity_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
|
||||||
|
for rec in catalog.entity_records:
|
||||||
|
name = str(rec.get("name") or "").strip().lower()
|
||||||
|
if not name or len(name) < 2:
|
||||||
|
continue
|
||||||
|
blob = str(rec.get("blob") or "").lower()
|
||||||
|
layer = str(rec.get("source_layer") or "")
|
||||||
|
score, mtype = _match_entity(query_l, name, blob)
|
||||||
|
if score <= 0:
|
||||||
|
continue
|
||||||
|
resolution.candidate_entities.append(
|
||||||
|
ScopeCandidate(value=name, score=score, source_layer=layer, match_type=mtype)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_api_candidates(query_l: str, catalog: DocsScopeCatalog, resolution: ScopeResolution) -> None:
|
||||||
|
for rec in catalog.api_records:
|
||||||
|
ep = str(rec.get("endpoint") or "").strip().lower()
|
||||||
|
if not ep:
|
||||||
|
continue
|
||||||
|
layer = str(rec.get("source_layer") or "")
|
||||||
|
score, mtype = _match_score(query_l, ep.replace(" ", ""))
|
||||||
|
if score <= 0:
|
||||||
|
continue
|
||||||
|
resolution.candidate_apis.append(
|
||||||
|
ScopeCandidate(value=ep, score=score, source_layer=layer, match_type=mtype)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _phrase_score(query_l: str, dom: str, sub: str) -> float:
|
||||||
|
if _contains_token(query_l, dom) and _contains_token(query_l, sub):
|
||||||
|
return max(_SCORE_NORMALIZED, 0.9)
|
||||||
|
joined = re.sub(r"\s+", " ", f"{dom} {sub}".strip())
|
||||||
|
if joined in query_l or query_l in joined:
|
||||||
|
return _SCORE_NORMALIZED
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _match_entity(query_l: str, name: str, blob: str) -> tuple[float, str]:
|
||||||
|
score, mt = _match_score(query_l, name)
|
||||||
|
if score > 0:
|
||||||
|
return score, mt
|
||||||
|
if name in blob and len(name) >= 4:
|
||||||
|
# cross-language hints: name appears in catalog blob; small boost if query token overlaps blob
|
||||||
|
q_tokens = set(query_l.split())
|
||||||
|
b_tokens = set(blob.split())
|
||||||
|
overlap = q_tokens & b_tokens
|
||||||
|
if overlap and (q_tokens & {name} or name[:4] in query_l):
|
||||||
|
return _SCORE_SOFT, "normalized"
|
||||||
|
return 0.0, "normalized"
|
||||||
|
|
||||||
|
|
||||||
|
def _match_score(query_l: str, value: str) -> tuple[float, str]:
|
||||||
|
v = str(value or "").strip().lower()
|
||||||
|
if not v:
|
||||||
|
return 0.0, "normalized"
|
||||||
|
v_compact = v.replace(" ", "")
|
||||||
|
q_compact = query_l.replace(" ", "")
|
||||||
|
if v == query_l or v_compact == q_compact:
|
||||||
|
return _SCORE_EXACT, "exact"
|
||||||
|
if _contains_token(query_l, v) or _contains_token(query_l, v.replace("/", " ")):
|
||||||
|
return _SCORE_EXACT, "exact"
|
||||||
|
if v in q_compact or v_compact in q_compact:
|
||||||
|
return _SCORE_NORMALIZED, "normalized"
|
||||||
|
if v in query_l:
|
||||||
|
return _SCORE_NORMALIZED, "normalized"
|
||||||
|
# prefix / slug
|
||||||
|
for token in query_l.split():
|
||||||
|
if token.startswith(v[: min(4, len(v))]) and len(v) >= 4:
|
||||||
|
return _SCORE_SOFT, "normalized"
|
||||||
|
return 0.0, "normalized"
|
||||||
|
|
||||||
|
|
||||||
|
def _contains_token(hay: str, needle: str) -> bool:
|
||||||
|
if not needle:
|
||||||
|
return False
|
||||||
|
return f" {needle} " in f" {hay} "
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_candidates(resolution: ScopeResolution) -> None:
|
||||||
|
resolution.candidate_domains = _dedupe_list(resolution.candidate_domains)
|
||||||
|
resolution.candidate_subdomains = _dedupe_list(resolution.candidate_subdomains)
|
||||||
|
resolution.candidate_entities = _dedupe_list(resolution.candidate_entities)
|
||||||
|
resolution.candidate_apis = _dedupe_list(resolution.candidate_apis)
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_list(items: list[ScopeCandidate]) -> list[ScopeCandidate]:
|
||||||
|
best: dict[str, ScopeCandidate] = {}
|
||||||
|
for c in items:
|
||||||
|
key = f"{c.value}|{c.source_layer}"
|
||||||
|
prev = best.get(key)
|
||||||
|
if prev is None or c.score > prev.score:
|
||||||
|
best[key] = c
|
||||||
|
return sorted(best.values(), key=lambda c: (-c.score, c.value))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_global_enumeration(query_l: str, *, has_strong_any: bool) -> bool:
|
||||||
|
if has_strong_any:
|
||||||
|
return False
|
||||||
|
if any(m in query_l for m in _PROJECT_WIDE_MARKERS) and any(
|
||||||
|
m in query_l for m in ("какие", "какой", "список", "перечисли", "метод", "api")
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
if any(query_l.strip().startswith(m.strip()) for m in _ENUM_MARKERS_RU if len(m.strip()) > 2):
|
||||||
|
if any(k in query_l for k in ("метод", "api", "ручк", "эндпоинт")):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
@@ -2,8 +2,17 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
from dataclasses import replace
|
||||||
|
|
||||||
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
|
from app.core.agent.processes.v2.intent_router.modules.anchors import V2AnchorExtractor
|
||||||
from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer
|
from app.core.agent.processes.v2.intent_router.modules.normalizer import V2QueryNormalizer
|
||||||
|
from app.core.agent.processes.v2.intent_router.modules.scope_catalog import DocsScopeCatalog, build_docs_scope_catalog
|
||||||
|
from app.core.agent.processes.v2.intent_router.modules.scope_resolver import (
|
||||||
|
plausible_doc_endpoint_paths,
|
||||||
|
promote_target_terms,
|
||||||
|
resolve_docs_scope,
|
||||||
|
)
|
||||||
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
|
from app.core.agent.processes.v2.intent_router.modules.target_terms import V2TargetTermsExtractor
|
||||||
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
||||||
from app.core.agent.processes.v2.intent_router.routers.confidence import V2ConfidenceAdjuster
|
from app.core.agent.processes.v2.intent_router.routers.confidence import V2ConfidenceAdjuster
|
||||||
@@ -11,8 +20,18 @@ from app.core.agent.processes.v2.intent_router.routers.fallback import V2Fallbac
|
|||||||
from app.core.agent.processes.v2.intent_router.routers.llm import V2LlmRouter
|
from app.core.agent.processes.v2.intent_router.routers.llm import V2LlmRouter
|
||||||
from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2RouteCatalog
|
from app.core.agent.processes.v2.intent_router.routers.route_catalog import V2RouteCatalog
|
||||||
from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator
|
from app.core.agent.processes.v2.intent_router.routers.validator import V2RouteValidator
|
||||||
from app.core.agent.processes.v2.models import V2RouteResult
|
from app.core.agent.utils.process_v2.models import V2RouteResult, V2ScopeType
|
||||||
from app.core.agent.utils.llm import AgentLlmService
|
from app.core.agent.utils.llm import AgentLlmService
|
||||||
|
from app.core.rag.persistence.query_repository import RagQueryRepository
|
||||||
|
|
||||||
|
|
||||||
|
def _scope_candidate_dict(candidate) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"value": candidate.value,
|
||||||
|
"score": candidate.score,
|
||||||
|
"source_layer": candidate.source_layer,
|
||||||
|
"match_type": candidate.match_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class V2IntentRouter:
|
class V2IntentRouter:
|
||||||
@@ -25,6 +44,7 @@ class V2IntentRouter:
|
|||||||
enable_llm_disambiguation: bool = True,
|
enable_llm_disambiguation: bool = True,
|
||||||
route_catalog: V2RouteCatalog | None = None,
|
route_catalog: V2RouteCatalog | None = None,
|
||||||
confidence_adjuster: V2ConfidenceAdjuster | None = None,
|
confidence_adjuster: V2ConfidenceAdjuster | None = None,
|
||||||
|
scope_rows_provider: Callable[[str], list[dict]] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._normalizer = normalizer or V2QueryNormalizer()
|
self._normalizer = normalizer or V2QueryNormalizer()
|
||||||
self._target_terms_extractor = target_terms_extractor or V2TargetTermsExtractor()
|
self._target_terms_extractor = target_terms_extractor or V2TargetTermsExtractor()
|
||||||
@@ -35,23 +55,48 @@ class V2IntentRouter:
|
|||||||
self._confidence_adjuster = confidence_adjuster or V2ConfidenceAdjuster()
|
self._confidence_adjuster = confidence_adjuster or V2ConfidenceAdjuster()
|
||||||
self._enable_llm_disambiguation = enable_llm_disambiguation
|
self._enable_llm_disambiguation = enable_llm_disambiguation
|
||||||
self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None
|
self._llm_router = V2LlmRouter(llm, catalog=self._catalog) if llm is not None else None
|
||||||
|
self._scope_rows_provider = scope_rows_provider
|
||||||
|
|
||||||
def route(self, user_query: str) -> V2RouteResult:
|
def route(self, user_query: str, *, rag_session_id: str | None = None) -> V2RouteResult:
|
||||||
normalized_query = self._normalizer.normalize(user_query)
|
normalized_query = self._normalizer.normalize(user_query)
|
||||||
target_terms_analysis = self._target_terms_extractor.extract(normalized_query)
|
target_terms_analysis = self._target_terms_extractor.extract(normalized_query)
|
||||||
anchor_analysis = self._anchor_extractor.extract(normalized_query, target_terms_analysis)
|
sanitized_eps = plausible_doc_endpoint_paths(list(target_terms_analysis.endpoint_paths))
|
||||||
|
if sanitized_eps != list(target_terms_analysis.endpoint_paths):
|
||||||
|
target_terms_analysis = replace(target_terms_analysis, endpoint_paths=sanitized_eps)
|
||||||
|
allowed_paths = set(sanitized_eps)
|
||||||
|
target_terms_analysis = replace(
|
||||||
|
target_terms_analysis,
|
||||||
|
target_terms=[
|
||||||
|
t
|
||||||
|
for t in target_terms_analysis.target_terms
|
||||||
|
if not str(t).startswith("/") or str(t).lower() in allowed_paths
|
||||||
|
],
|
||||||
|
)
|
||||||
|
raw_target_terms = list(target_terms_analysis.target_terms)
|
||||||
|
scope_rows = self._load_scope_rows(rag_session_id)
|
||||||
|
scope_catalog: DocsScopeCatalog | None
|
||||||
|
if not scope_rows:
|
||||||
|
scope_catalog = None
|
||||||
|
else:
|
||||||
|
scope_catalog = build_docs_scope_catalog(scope_rows)
|
||||||
|
resolution = resolve_docs_scope(normalized_query, target_terms_analysis, scope_catalog)
|
||||||
|
promoted_terms = promote_target_terms(raw_target_terms, target_terms_analysis, resolution)
|
||||||
|
refined_terms = replace(target_terms_analysis, target_terms=promoted_terms)
|
||||||
|
anchor_analysis = self._anchor_extractor.extract(normalized_query, refined_terms)
|
||||||
|
self._apply_scope_to_anchors(anchor_analysis.anchors, resolution)
|
||||||
features = QueryFeatures(
|
features = QueryFeatures(
|
||||||
normalized_query=normalized_query,
|
normalized_query=normalized_query,
|
||||||
target_terms=list(target_terms_analysis.target_terms),
|
target_terms=list(refined_terms.target_terms),
|
||||||
endpoint_paths=list(target_terms_analysis.endpoint_paths),
|
endpoint_paths=list(refined_terms.endpoint_paths),
|
||||||
file_names=list(anchor_analysis.anchors.file_names),
|
file_names=list(anchor_analysis.anchors.file_names),
|
||||||
matched_aliases=list(target_terms_analysis.matched_aliases),
|
matched_aliases=list(refined_terms.matched_aliases),
|
||||||
target_doc_hints=list(anchor_analysis.anchors.target_doc_hints),
|
target_doc_hints=list(anchor_analysis.anchors.target_doc_hints),
|
||||||
file_markers=list(anchor_analysis.file_markers),
|
file_markers=list(anchor_analysis.file_markers),
|
||||||
architecture_markers=list(anchor_analysis.architecture_markers),
|
architecture_markers=list(anchor_analysis.architecture_markers),
|
||||||
logic_markers=list(anchor_analysis.logic_markers),
|
logic_markers=list(anchor_analysis.logic_markers),
|
||||||
domain_markers=list(anchor_analysis.domain_markers),
|
domain_markers=list(anchor_analysis.domain_markers),
|
||||||
endpoint_markers=list(anchor_analysis.endpoint_markers),
|
endpoint_markers=list(anchor_analysis.endpoint_markers),
|
||||||
|
scope_type=resolution.scope_type,
|
||||||
)
|
)
|
||||||
llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None
|
llm_attempted = self._enable_llm_disambiguation and self._llm_router is not None
|
||||||
llm_candidate = self._route_with_llm(
|
llm_candidate = self._route_with_llm(
|
||||||
@@ -59,7 +104,6 @@ class V2IntentRouter:
|
|||||||
anchors=anchor_analysis.anchors,
|
anchors=anchor_analysis.anchors,
|
||||||
)
|
)
|
||||||
llm_result = self._validator.validate(llm_candidate)
|
llm_result = self._validator.validate(llm_candidate)
|
||||||
llm_result = self._apply_deterministic_corrections(llm_result, features)
|
|
||||||
if llm_result is not None:
|
if llm_result is not None:
|
||||||
confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features)
|
confidence = self._confidence_adjuster.adjust(float(llm_result["confidence"]), features)
|
||||||
return V2RouteResult(
|
return V2RouteResult(
|
||||||
@@ -74,14 +118,53 @@ class V2IntentRouter:
|
|||||||
routing_mode="llm_default",
|
routing_mode="llm_default",
|
||||||
llm_router_used=True,
|
llm_router_used=True,
|
||||||
reason_short=str(llm_result["reason_short"]),
|
reason_short=str(llm_result["reason_short"]),
|
||||||
|
scope_type=resolution.scope_type,
|
||||||
|
)
|
||||||
|
if llm_attempted:
|
||||||
|
return self._fallback_router.route_without_deterministic_signals(
|
||||||
|
user_query=user_query,
|
||||||
|
features=features,
|
||||||
|
anchors=anchor_analysis.anchors,
|
||||||
|
scope_type=resolution.scope_type,
|
||||||
)
|
)
|
||||||
return self._fallback_router.route(
|
return self._fallback_router.route(
|
||||||
user_query=user_query,
|
user_query=user_query,
|
||||||
features=features,
|
features=features,
|
||||||
anchors=anchor_analysis.anchors,
|
anchors=anchor_analysis.anchors,
|
||||||
llm_attempted=llm_attempted,
|
llm_attempted=llm_attempted,
|
||||||
|
scope_type=resolution.scope_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _load_scope_rows(self, rag_session_id: str | None) -> list[dict]:
|
||||||
|
sid = str(rag_session_id or "").strip()
|
||||||
|
if not sid:
|
||||||
|
return []
|
||||||
|
if self._scope_rows_provider is not None:
|
||||||
|
return self._scope_rows_provider(sid)
|
||||||
|
try:
|
||||||
|
return RagQueryRepository().list_docs_scope_index_rows(sid)
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _apply_scope_to_anchors(self, anchors, resolution) -> None:
|
||||||
|
anchors.candidate_domains = list(resolution.candidate_domains)
|
||||||
|
anchors.candidate_subdomains = list(resolution.candidate_subdomains)
|
||||||
|
anchors.candidate_entities = list(resolution.candidate_entities)
|
||||||
|
anchors.candidate_apis = list(resolution.candidate_apis)
|
||||||
|
if not resolution.catalog_loaded:
|
||||||
|
return
|
||||||
|
merged_endpoints = list(dict.fromkeys([*resolution.strong_endpoint_paths, *anchors.endpoint_paths]))
|
||||||
|
anchors.endpoint_paths = merged_endpoints
|
||||||
|
merged_entities = list(dict.fromkeys([*resolution.strong_entity_names, *anchors.entity_names]))
|
||||||
|
anchors.entity_names = merged_entities
|
||||||
|
if resolution.strong_domain:
|
||||||
|
anchors.process_domain = resolution.strong_domain
|
||||||
|
if resolution.strong_subdomain:
|
||||||
|
anchors.process_subdomain = resolution.strong_subdomain
|
||||||
|
if resolution.scope_type == V2ScopeType.SUBDOMAIN and resolution.strong_domain and resolution.strong_subdomain:
|
||||||
|
anchors.process_domain = resolution.strong_domain
|
||||||
|
anchors.process_subdomain = resolution.strong_subdomain
|
||||||
|
|
||||||
def _route_with_llm(self, *, features: QueryFeatures, anchors) -> dict | None:
|
def _route_with_llm(self, *, features: QueryFeatures, anchors) -> dict | None:
|
||||||
if not self._enable_llm_disambiguation or self._llm_router is None:
|
if not self._enable_llm_disambiguation or self._llm_router is None:
|
||||||
return None
|
return None
|
||||||
@@ -89,6 +172,7 @@ class V2IntentRouter:
|
|||||||
return self._llm_router.classify(
|
return self._llm_router.classify(
|
||||||
normalized_query=features.normalized_query,
|
normalized_query=features.normalized_query,
|
||||||
target_terms=features.target_terms,
|
target_terms=features.target_terms,
|
||||||
|
scope_type=features.scope_type,
|
||||||
anchors={
|
anchors={
|
||||||
"entity_names": anchors.entity_names,
|
"entity_names": anchors.entity_names,
|
||||||
"file_names": anchors.file_names,
|
"file_names": anchors.file_names,
|
||||||
@@ -97,22 +181,11 @@ class V2IntentRouter:
|
|||||||
"matched_aliases": anchors.matched_aliases,
|
"matched_aliases": anchors.matched_aliases,
|
||||||
"process_domain": anchors.process_domain,
|
"process_domain": anchors.process_domain,
|
||||||
"process_subdomain": anchors.process_subdomain,
|
"process_subdomain": anchors.process_subdomain,
|
||||||
|
"candidate_domains": [_scope_candidate_dict(c) for c in anchors.candidate_domains],
|
||||||
|
"candidate_subdomains": [_scope_candidate_dict(c) for c in anchors.candidate_subdomains],
|
||||||
|
"candidate_entities": [_scope_candidate_dict(c) for c in anchors.candidate_entities],
|
||||||
|
"candidate_apis": [_scope_candidate_dict(c) for c in anchors.candidate_apis],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _apply_deterministic_corrections(self, candidate: dict | None, features: QueryFeatures) -> dict | None:
|
|
||||||
if candidate is None:
|
|
||||||
return None
|
|
||||||
if candidate.get("routing_domain") == "DOCS" and self._should_force_find_files(features):
|
|
||||||
corrected = dict(candidate)
|
|
||||||
corrected["subintent"] = "FIND_FILES"
|
|
||||||
return corrected
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
def _should_force_find_files(self, features: QueryFeatures) -> bool:
|
|
||||||
if features.file_markers or features.file_names:
|
|
||||||
return True
|
|
||||||
query = features.normalized_query.lower()
|
|
||||||
return "show doc" in query or "show file" in query or "doc for" in query
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
||||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
|
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
|
||||||
from app.core.agent.processes.v2.intent_router.routers.docs_subintent_resolver import DocsSubintentResolver
|
from app.core.agent.processes.v2.intent_router.routers.docs_subintent_resolver import DocsSubintentResolver
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,33 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
||||||
from app.core.agent.processes.v2.models import V2Subintent
|
from app.core.agent.utils.process_v2.models import V2Subintent
|
||||||
|
|
||||||
|
|
||||||
class DocsSubintentResolver:
|
class DocsSubintentResolver:
|
||||||
|
_API_ENUM_MARKERS = (
|
||||||
|
"какие api",
|
||||||
|
"какие эндпоинты",
|
||||||
|
"какие endpoint",
|
||||||
|
"список api",
|
||||||
|
"список эндпоинтов",
|
||||||
|
"список endpoint",
|
||||||
|
"все api",
|
||||||
|
"все эндпоинты",
|
||||||
|
"перечисли api",
|
||||||
|
"перечисли эндпоинты",
|
||||||
|
"доступные api",
|
||||||
|
"available endpoints",
|
||||||
|
"exposed api",
|
||||||
|
)
|
||||||
|
_API_WORD_MARKERS = ("api", "эндпоинт", "endpoint", "роут", "route", "метод")
|
||||||
|
_LIST_WORD_MARKERS = ("какие", "список", "перечисли", "все", "доступные", "list", "available", "exposed")
|
||||||
|
|
||||||
def resolve(self, features: QueryFeatures) -> str | None:
|
def resolve(self, features: QueryFeatures) -> str | None:
|
||||||
if features.file_markers or self._has_file_like_anchor(features):
|
if features.file_markers or self._has_file_like_anchor(features):
|
||||||
return V2Subintent.FIND_FILES
|
return V2Subintent.FIND_FILES
|
||||||
|
if self._is_api_exposed_request(features):
|
||||||
|
return V2Subintent.API_EXPOSED
|
||||||
if any(
|
if any(
|
||||||
(
|
(
|
||||||
features.endpoint_paths,
|
features.endpoint_paths,
|
||||||
@@ -26,3 +46,13 @@ class DocsSubintentResolver:
|
|||||||
hint.endswith((".md", ".yaml", ".yml", ".json"))
|
hint.endswith((".md", ".yaml", ".yml", ".json"))
|
||||||
for hint in features.target_doc_hints
|
for hint in features.target_doc_hints
|
||||||
) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names)
|
) or any(token.endswith((".md", ".yaml", ".yml", ".json")) for token in features.file_names)
|
||||||
|
|
||||||
|
def _is_api_exposed_request(self, features: QueryFeatures) -> bool:
|
||||||
|
query = features.normalized_query.lower()
|
||||||
|
if features.endpoint_paths:
|
||||||
|
return False
|
||||||
|
if any(marker in query for marker in self._API_ENUM_MARKERS):
|
||||||
|
return True
|
||||||
|
has_api_words = any(marker in query for marker in self._API_WORD_MARKERS)
|
||||||
|
has_list_words = any(marker in query for marker in self._LIST_WORD_MARKERS)
|
||||||
|
return has_api_words and has_list_words
|
||||||
|
|||||||
@@ -1,10 +1,33 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
from app.core.agent.processes.v2.intent_router.models import QueryFeatures
|
||||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2RouteResult, V2Subintent
|
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2RouteResult, V2ScopeType, V2Subintent
|
||||||
|
|
||||||
|
|
||||||
class V2FallbackRouter:
|
class V2FallbackRouter:
|
||||||
|
def route_without_deterministic_signals(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
user_query: str,
|
||||||
|
features: QueryFeatures,
|
||||||
|
anchors,
|
||||||
|
scope_type: str = V2ScopeType.UNKNOWN,
|
||||||
|
) -> V2RouteResult:
|
||||||
|
return V2RouteResult(
|
||||||
|
routing_domain=V2Domain.GENERAL,
|
||||||
|
intent=V2Intent.GENERAL_QA,
|
||||||
|
subintent=V2Subintent.SUMMARY,
|
||||||
|
user_query=user_query,
|
||||||
|
normalized_query=features.normalized_query,
|
||||||
|
target_terms=features.target_terms,
|
||||||
|
anchors=anchors,
|
||||||
|
confidence=0.0,
|
||||||
|
routing_mode="llm_fallback",
|
||||||
|
llm_router_used=True,
|
||||||
|
reason_short="llm route unresolved",
|
||||||
|
scope_type=scope_type,
|
||||||
|
)
|
||||||
|
|
||||||
def route(
|
def route(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@@ -12,6 +35,7 @@ class V2FallbackRouter:
|
|||||||
features: QueryFeatures,
|
features: QueryFeatures,
|
||||||
anchors,
|
anchors,
|
||||||
llm_attempted: bool,
|
llm_attempted: bool,
|
||||||
|
scope_type: str = V2ScopeType.UNKNOWN,
|
||||||
) -> V2RouteResult:
|
) -> V2RouteResult:
|
||||||
if features.file_markers:
|
if features.file_markers:
|
||||||
return self._build_docs_result(
|
return self._build_docs_result(
|
||||||
@@ -21,6 +45,32 @@ class V2FallbackRouter:
|
|||||||
subintent=V2Subintent.FIND_FILES,
|
subintent=V2Subintent.FIND_FILES,
|
||||||
llm_attempted=llm_attempted,
|
llm_attempted=llm_attempted,
|
||||||
reason="fallback file markers",
|
reason="fallback file markers",
|
||||||
|
scope_type=scope_type,
|
||||||
|
)
|
||||||
|
if self._has_docs_update_signal(features):
|
||||||
|
return V2RouteResult(
|
||||||
|
routing_domain=V2Domain.DOCS,
|
||||||
|
intent=V2Intent.DOC_UPDATE,
|
||||||
|
subintent=V2Subintent.FROM_FEATURE,
|
||||||
|
user_query=user_query,
|
||||||
|
normalized_query=features.normalized_query,
|
||||||
|
target_terms=features.target_terms,
|
||||||
|
anchors=anchors,
|
||||||
|
confidence=0.0,
|
||||||
|
routing_mode=self._routing_mode(llm_attempted),
|
||||||
|
llm_router_used=llm_attempted,
|
||||||
|
reason_short="fallback docs update from feature",
|
||||||
|
scope_type=scope_type,
|
||||||
|
)
|
||||||
|
if self._has_api_exposed_signal(features):
|
||||||
|
return self._build_docs_result(
|
||||||
|
user_query=user_query,
|
||||||
|
features=features,
|
||||||
|
anchors=anchors,
|
||||||
|
subintent=V2Subintent.API_EXPOSED,
|
||||||
|
llm_attempted=llm_attempted,
|
||||||
|
reason="fallback docs api exposed",
|
||||||
|
scope_type=scope_type,
|
||||||
)
|
)
|
||||||
if self._has_docs_signal(features):
|
if self._has_docs_signal(features):
|
||||||
return self._build_docs_result(
|
return self._build_docs_result(
|
||||||
@@ -30,6 +80,7 @@ class V2FallbackRouter:
|
|||||||
subintent=V2Subintent.SUMMARY,
|
subintent=V2Subintent.SUMMARY,
|
||||||
llm_attempted=llm_attempted,
|
llm_attempted=llm_attempted,
|
||||||
reason="fallback docs summary",
|
reason="fallback docs summary",
|
||||||
|
scope_type=scope_type,
|
||||||
)
|
)
|
||||||
return V2RouteResult(
|
return V2RouteResult(
|
||||||
routing_domain=V2Domain.GENERAL,
|
routing_domain=V2Domain.GENERAL,
|
||||||
@@ -43,6 +94,7 @@ class V2FallbackRouter:
|
|||||||
routing_mode=self._routing_mode(llm_attempted),
|
routing_mode=self._routing_mode(llm_attempted),
|
||||||
llm_router_used=llm_attempted,
|
llm_router_used=llm_attempted,
|
||||||
reason_short="fallback general summary",
|
reason_short="fallback general summary",
|
||||||
|
scope_type=scope_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_docs_result(
|
def _build_docs_result(
|
||||||
@@ -54,6 +106,7 @@ class V2FallbackRouter:
|
|||||||
subintent: str,
|
subintent: str,
|
||||||
llm_attempted: bool,
|
llm_attempted: bool,
|
||||||
reason: str,
|
reason: str,
|
||||||
|
scope_type: str = V2ScopeType.UNKNOWN,
|
||||||
) -> V2RouteResult:
|
) -> V2RouteResult:
|
||||||
return V2RouteResult(
|
return V2RouteResult(
|
||||||
routing_domain=V2Domain.DOCS,
|
routing_domain=V2Domain.DOCS,
|
||||||
@@ -67,6 +120,7 @@ class V2FallbackRouter:
|
|||||||
routing_mode=self._routing_mode(llm_attempted),
|
routing_mode=self._routing_mode(llm_attempted),
|
||||||
llm_router_used=llm_attempted,
|
llm_router_used=llm_attempted,
|
||||||
reason_short=reason,
|
reason_short=reason,
|
||||||
|
scope_type=scope_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _has_docs_signal(self, features: QueryFeatures) -> bool:
|
def _has_docs_signal(self, features: QueryFeatures) -> bool:
|
||||||
@@ -82,5 +136,30 @@ class V2FallbackRouter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _has_api_exposed_signal(self, features: QueryFeatures) -> bool:
|
||||||
|
query = features.normalized_query.lower()
|
||||||
|
has_api = any(marker in query for marker in ("api", "эндпоинт", "endpoint", "роут", "route", "метод"))
|
||||||
|
has_listing = any(marker in query for marker in ("какие", "список", "перечисли", "все", "available", "list"))
|
||||||
|
return has_api and has_listing and not features.endpoint_paths and not features.file_markers
|
||||||
|
|
||||||
|
def _has_docs_update_signal(self, features: QueryFeatures) -> bool:
|
||||||
|
query = features.normalized_query.lower()
|
||||||
|
has_update = any(
|
||||||
|
marker in query
|
||||||
|
for marker in (
|
||||||
|
"обнов",
|
||||||
|
"измен",
|
||||||
|
"внести правк",
|
||||||
|
"docs update",
|
||||||
|
"update documentation",
|
||||||
|
"документац",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
has_feature = any(
|
||||||
|
marker in query
|
||||||
|
for marker in ("системной аналитик", "feature", ".md", "confluence", "from feature")
|
||||||
|
)
|
||||||
|
return has_update and has_feature
|
||||||
|
|
||||||
def _routing_mode(self, llm_attempted: bool) -> str:
|
def _routing_mode(self, llm_attempted: bool) -> str:
|
||||||
return "llm_fallback" if llm_attempted else "deterministic_fallback"
|
return "llm_fallback" if llm_attempted else "deterministic_fallback"
|
||||||
|
|||||||
@@ -17,10 +17,18 @@ class V2LlmRouter:
|
|||||||
self._prompt_name = prompt_name
|
self._prompt_name = prompt_name
|
||||||
self._catalog = catalog or V2RouteCatalog()
|
self._catalog = catalog or V2RouteCatalog()
|
||||||
|
|
||||||
def classify(self, *, normalized_query: str, target_terms: list[str], anchors: dict) -> dict | None:
|
def classify(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
normalized_query: str,
|
||||||
|
target_terms: list[str],
|
||||||
|
anchors: dict,
|
||||||
|
scope_type: str = "unknown",
|
||||||
|
) -> dict | None:
|
||||||
payload = {
|
payload = {
|
||||||
"normalized_query": normalized_query,
|
"normalized_query": normalized_query,
|
||||||
"target_terms": target_terms,
|
"target_terms": target_terms,
|
||||||
|
"scope_type": scope_type,
|
||||||
"anchors": anchors,
|
"anchors": anchors,
|
||||||
"allowed_routes": self._catalog.allowed_routes(),
|
"allowed_routes": self._catalog.allowed_routes(),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,12 @@ namespace: v2_intent_router
|
|||||||
prompts:
|
prompts:
|
||||||
route: |
|
route: |
|
||||||
Ты выбираешь маршрут для узкого процесса v2.
|
Ты выбираешь маршрут для узкого процесса v2.
|
||||||
|
Поле `scope_type` и блок `anchors` с `candidate_*` — это предварительная привязка к каталогу документации текущей RAG-сессии (детерминированно извлечённые кандидаты). Не выдумывай домены, сущности и API, которых нет в этих полях; используй их для снятия неоднозначности.
|
||||||
Основной принцип:
|
Основной принцип:
|
||||||
- DOCS / DOC_EXPLAIN / FIND_FILES: запрос просит найти файл, документ или путь.
|
- DOCS / DOC_EXPLAIN / FIND_FILES: запрос просит найти файл, документ или путь.
|
||||||
|
- DOCS / DOC_EXPLAIN / API_EXPOSED: запрос просит перечислить доступные API-методы/эндпоинты.
|
||||||
- DOCS / DOC_EXPLAIN / SUMMARY: запрос просит объяснить документацию, endpoint, архитектуру, процесс или сущность.
|
- DOCS / DOC_EXPLAIN / SUMMARY: запрос просит объяснить документацию, endpoint, архитектуру, процесс или сущность.
|
||||||
|
- DOCS / DOC_UPDATE / FROM_FEATURE: запрос просит обновить документацию по системной аналитике (feature markdown/confluence).
|
||||||
- GENERAL / GENERAL_QA / SUMMARY: общий обзорный вопрос без явного запроса к документации.
|
- GENERAL / GENERAL_QA / SUMMARY: общий обзорный вопрос без явного запроса к документации.
|
||||||
|
|
||||||
Используй только маршруты из поля `allowed_routes`.
|
Используй только маршруты из поля `allowed_routes`.
|
||||||
@@ -17,8 +20,8 @@ prompts:
|
|||||||
Ответь только JSON-объектом вида:
|
Ответь только JSON-объектом вида:
|
||||||
{
|
{
|
||||||
"routing_domain": "GENERAL" | "DOCS",
|
"routing_domain": "GENERAL" | "DOCS",
|
||||||
"intent": "GENERAL_QA" | "DOC_EXPLAIN",
|
"intent": "GENERAL_QA" | "DOC_EXPLAIN" | "DOC_UPDATE",
|
||||||
"subintent": "SUMMARY" | "FIND_FILES",
|
"subintent": "SUMMARY" | "FIND_FILES" | "API_EXPOSED" | "FROM_FEATURE",
|
||||||
"confidence": 0.0-1.0,
|
"confidence": 0.0-1.0,
|
||||||
"reason_short": "короткая причина"
|
"reason_short": "короткая причина"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from app.core.agent.processes.v2.models import V2Domain, V2Intent, V2Subintent
|
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
|
||||||
|
|
||||||
|
|
||||||
class V2RouteCatalog:
|
class V2RouteCatalog:
|
||||||
_ALLOWED_ROUTES = (
|
_ALLOWED_ROUTES = (
|
||||||
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES),
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES),
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED),
|
||||||
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY),
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY),
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE),
|
||||||
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY),
|
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,304 +0,0 @@
|
|||||||
"""Процесс v2: роутинг, план retrieval, вызов rag API, сборка evidence и workflow."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
|
|
||||||
from app.core.agent.processes.v2.evidence.assembler import DocsEvidenceAssembler
|
|
||||||
from app.core.agent.processes.v2.evidence.gate import DocsEvidenceGate
|
|
||||||
from app.core.agent.processes.v2.intent_router import V2IntentRouter
|
|
||||||
from app.core.agent.processes.v2.models import V2Intent, V2Subintent
|
|
||||||
from app.core.agent.processes.v2.retrieval import DocsMetadataLookupIndex
|
|
||||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
|
||||||
from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
|
|
||||||
RagRowIndex,
|
|
||||||
merge_row_lists,
|
|
||||||
normalize_doc_path,
|
|
||||||
normalized_path_set,
|
|
||||||
row_path,
|
|
||||||
seed_candidates_from_target_hints,
|
|
||||||
)
|
|
||||||
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
|
|
||||||
from app.core.agent.processes.v2.workflows.docs_explain_find_files.context import DocsExplainFindFilesContext
|
|
||||||
from app.core.agent.processes.v2.workflows.docs_explain_find_files.graph import DocsExplainFindFilesGraph
|
|
||||||
from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
|
|
||||||
from app.core.agent.processes.v2.workflows.docs_explain_summary.graph import DocsExplainSummaryGraph
|
|
||||||
from app.core.agent.processes.v2.workflows.general_summary.context import GeneralSummaryContext
|
|
||||||
from app.core.agent.processes.v2.workflows.general_summary.graph import GeneralSummaryGraph
|
|
||||||
from app.core.agent.processes.base import AgentProcess, ProcessResult
|
|
||||||
from app.core.agent.utils.llm import AgentLlmService
|
|
||||||
|
|
||||||
|
|
||||||
class V2Process(AgentProcess):
|
|
||||||
version = "v2"
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
llm: AgentLlmService,
|
|
||||||
policy_resolver: V2RetrievalPolicyResolver,
|
|
||||||
rag_adapter: V2RagRetrievalAdapter,
|
|
||||||
evidence_assembler: DocsEvidenceAssembler,
|
|
||||||
evidence_gate: DocsEvidenceGate | None = None,
|
|
||||||
router: V2IntentRouter | None = None,
|
|
||||||
docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
|
|
||||||
general_summary_prompt_name: str = "v2_general.summary_answer",
|
|
||||||
workflow_llm_enabled: bool = True,
|
|
||||||
) -> None:
|
|
||||||
self._router = router or V2IntentRouter()
|
|
||||||
self._policy_resolver = policy_resolver
|
|
||||||
self._rag_adapter = rag_adapter
|
|
||||||
self._evidence_assembler = evidence_assembler
|
|
||||||
self._evidence_gate = evidence_gate or DocsEvidenceGate()
|
|
||||||
self._docs_summary_prompt_name = docs_summary_prompt_name
|
|
||||||
self._general_summary_prompt_name = general_summary_prompt_name
|
|
||||||
self._workflow_llm_enabled = workflow_llm_enabled
|
|
||||||
self._summary_graph = DocsExplainSummaryGraph(llm)
|
|
||||||
self._find_files_graph = DocsExplainFindFilesGraph()
|
|
||||||
self._general_summary_graph = GeneralSummaryGraph(llm)
|
|
||||||
|
|
||||||
async def run(self, context) -> ProcessResult:
|
|
||||||
route = self._router.route(context.request.message)
|
|
||||||
rag_session_id = context.session.active_rag_session_id
|
|
||||||
context.trace.module("process.v2").log(
|
|
||||||
"intent_routed",
|
|
||||||
{
|
|
||||||
"routing_domain": route.routing_domain,
|
|
||||||
"intent": route.intent,
|
|
||||||
"subintent": route.subintent,
|
|
||||||
"normalized_query": route.normalized_query,
|
|
||||||
"target_terms": route.target_terms,
|
|
||||||
"anchors": route_anchor_summary(route),
|
|
||||||
"confidence": route.confidence,
|
|
||||||
"routing_mode": route.routing_mode,
|
|
||||||
"llm_router_used": route.llm_router_used,
|
|
||||||
"reason_short": route.reason_short,
|
|
||||||
"rag_session_id": rag_session_id,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"router_resolved",
|
|
||||||
{
|
|
||||||
"domain": route.routing_domain,
|
|
||||||
"intent": route.intent,
|
|
||||||
"subintent": route.subintent,
|
|
||||||
"confidence": route.confidence,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"anchors_extracted",
|
|
||||||
{
|
|
||||||
"signal_types": route_anchor_summary(route)["signal_types"],
|
|
||||||
"endpoint_paths": route.anchors.endpoint_paths,
|
|
||||||
"target_doc_hints": route.anchors.target_doc_hints,
|
|
||||||
"matched_aliases": route.anchors.matched_aliases,
|
|
||||||
"target_terms": route.target_terms,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"alias_resolution",
|
|
||||||
{
|
|
||||||
"resolved_aliases": route.anchors.matched_aliases,
|
|
||||||
"target_doc_hints": route.anchors.target_doc_hints,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if not rag_session_id:
|
|
||||||
if route.intent == V2Intent.GENERAL_QA:
|
|
||||||
answer = "Не могу собрать grounded summary без активной RAG-сессии с проиндексированной документацией."
|
|
||||||
self._log_step(context, "evidence_gate_checked", {"passed": False, "reason": "missing_rag_session"})
|
|
||||||
self._log_step(context, "answer_generated", {"answer_mode": "insufficient_evidence"})
|
|
||||||
return ProcessResult(answer=answer)
|
|
||||||
return ProcessResult(answer="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией.")
|
|
||||||
plan = self._policy_resolver.resolve(route)
|
|
||||||
context.trace.module("process.v2.retrieval_policy").log(
|
|
||||||
"retrieval_plan_resolved",
|
|
||||||
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"retrieval_profile_selected",
|
|
||||||
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
|
|
||||||
)
|
|
||||||
retrieved_rows = await self._rag_adapter.fetch_rows(rag_session_id, route.normalized_query, plan)
|
|
||||||
metadata_rows = self._metadata_lookup_candidates(retrieved_rows, route)
|
|
||||||
rows = self._merge_candidate_rows(retrieved_rows, metadata_rows)
|
|
||||||
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
|
|
||||||
self._print_missing_target_hints(route, rows)
|
|
||||||
context.trace.module("process.v2.rag_retrieval").log(
|
|
||||||
"rag_rows_fetched",
|
|
||||||
{
|
|
||||||
"profile": plan.profile,
|
|
||||||
"row_count": len(rows),
|
|
||||||
"rows": [self._trace_row(row) for row in rows],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"candidate_generation",
|
|
||||||
{
|
|
||||||
"query": route.user_query,
|
|
||||||
"profile": plan.profile,
|
|
||||||
"details": {
|
|
||||||
"target_doc_hints": list(route.anchors.target_doc_hints),
|
|
||||||
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
|
|
||||||
},
|
|
||||||
"resolved_aliases": route.anchors.matched_aliases,
|
|
||||||
"target_doc_hints": route.anchors.target_doc_hints,
|
|
||||||
"candidate_docs_before_ranking": [self._trace_row(row) for row in rows[:8]],
|
|
||||||
"sources": {
|
|
||||||
"seeded": [self._trace_row(row) for row in retrieved_rows[:5] if row_path(row) in {normalize_doc_path(h) for h in route.anchors.target_doc_hints}],
|
|
||||||
"metadata_lookup": [self._trace_row(row) for row in metadata_rows[:5]],
|
|
||||||
"semantic": [self._trace_row(row) for row in retrieved_rows[:5]],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"retrieval_executed",
|
|
||||||
{
|
|
||||||
"query": route.user_query,
|
|
||||||
"profile": plan.profile,
|
|
||||||
"row_count": len(rows),
|
|
||||||
"target_doc_hints": route.anchors.target_doc_hints,
|
|
||||||
"top_results": [self._trace_row(row) for row in rows[:5]],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if route.subintent == V2Subintent.FIND_FILES:
|
|
||||||
files = self._evidence_assembler.assemble_files(rows, route)
|
|
||||||
gate = self._evidence_gate.check_files(route, files)
|
|
||||||
context.trace.module("process.v2.evidence").log(
|
|
||||||
"evidence_assembled",
|
|
||||||
{"mode": "find_files", "file_count": len(files), "files": [file.path for file in files]},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"evidence_assembled",
|
|
||||||
{"mode": "find_files", "primary_file": files[0].path if files else None, "file_count": len(files)},
|
|
||||||
)
|
|
||||||
self._log_ranking(context, files)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"evidence_gate_checked",
|
|
||||||
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
|
|
||||||
)
|
|
||||||
flow_context = DocsExplainFindFilesContext(
|
|
||||||
runtime=context,
|
|
||||||
route=route,
|
|
||||||
rag_session_id=rag_session_id,
|
|
||||||
files=files,
|
|
||||||
gate_decision=gate,
|
|
||||||
)
|
|
||||||
flow_context = await self._find_files_graph.run(flow_context)
|
|
||||||
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
|
|
||||||
return ProcessResult(answer=flow_context.answer)
|
|
||||||
documents = self._evidence_assembler.assemble_summaries(rows, route)
|
|
||||||
gate = self._evidence_gate.check_summaries(route, documents)
|
|
||||||
context.trace.module("process.v2.evidence").log(
|
|
||||||
"evidence_assembled",
|
|
||||||
{"mode": "summary", "document_count": len(documents), "documents": [item.path for item in documents]},
|
|
||||||
)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"evidence_assembled",
|
|
||||||
{"mode": "summary", "primary_doc": documents[0].path if documents else None, "document_count": len(documents)},
|
|
||||||
)
|
|
||||||
self._log_ranking(context, documents)
|
|
||||||
self._log_step(
|
|
||||||
context,
|
|
||||||
"evidence_gate_checked",
|
|
||||||
{"passed": gate.passed, "reason": gate.reason, "answer_mode": gate.answer_mode},
|
|
||||||
)
|
|
||||||
if route.intent == V2Intent.GENERAL_QA:
|
|
||||||
flow_context = GeneralSummaryContext(
|
|
||||||
runtime=context,
|
|
||||||
route=route,
|
|
||||||
prompt_name=self._general_summary_prompt_name,
|
|
||||||
workflow_llm_enabled=self._workflow_llm_enabled,
|
|
||||||
documents=documents,
|
|
||||||
gate_decision=gate,
|
|
||||||
)
|
|
||||||
flow_context = await self._general_summary_graph.run(flow_context)
|
|
||||||
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
|
|
||||||
return ProcessResult(answer=flow_context.answer)
|
|
||||||
flow_context = DocsExplainSummaryContext(
|
|
||||||
runtime=context,
|
|
||||||
route=route,
|
|
||||||
rag_session_id=rag_session_id,
|
|
||||||
prompt_name=self._docs_summary_prompt_name,
|
|
||||||
workflow_llm_enabled=self._workflow_llm_enabled,
|
|
||||||
documents=documents,
|
|
||||||
gate_decision=gate,
|
|
||||||
)
|
|
||||||
flow_context = await self._summary_graph.run(flow_context)
|
|
||||||
self._log_step(context, "answer_generated", {"answer_mode": gate.answer_mode, "answer_length": len(flow_context.answer)})
|
|
||||||
return ProcessResult(answer=flow_context.answer)
|
|
||||||
|
|
||||||
def _trace_row(self, row: dict) -> dict[str, object]:
|
|
||||||
metadata = row.get("metadata") or {}
|
|
||||||
content = str(row.get("content") or "").strip()
|
|
||||||
return {
|
|
||||||
"layer": str(row.get("layer") or ""),
|
|
||||||
"path": str(row.get("path") or ""),
|
|
||||||
"title": str(row.get("title") or ""),
|
|
||||||
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or ""),
|
|
||||||
"entity_name": str(metadata.get("entity_name") or ""),
|
|
||||||
"summary_text": str(metadata.get("summary_text") or "")[:400],
|
|
||||||
"section_path": str(metadata.get("section_path") or ""),
|
|
||||||
"content_preview": content[:400],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
|
|
||||||
context.trace.module("process.v2.pipeline").log(step, payload)
|
|
||||||
|
|
||||||
def _print_missing_target_hints(self, route, rows: list[dict]) -> None:
|
|
||||||
if not route.anchors.target_doc_hints:
|
|
||||||
return
|
|
||||||
candidate_paths = normalized_path_set(rows)
|
|
||||||
for hint in route.anchors.target_doc_hints:
|
|
||||||
if not str(hint or "").strip():
|
|
||||||
continue
|
|
||||||
normalized = normalize_doc_path(hint)
|
|
||||||
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
|
|
||||||
continue
|
|
||||||
if normalized not in candidate_paths:
|
|
||||||
print("ERROR: target doc missing from candidates:", normalized)
|
|
||||||
|
|
||||||
def _metadata_lookup_candidates(self, rows: list[dict], route) -> list[dict]:
|
|
||||||
return DocsMetadataLookupIndex(rows).lookup(route)
|
|
||||||
|
|
||||||
def _merge_candidate_rows(self, *groups: list[dict]) -> list[dict]:
|
|
||||||
return merge_row_lists(*groups)
|
|
||||||
|
|
||||||
def _log_ranking(self, context, items: list) -> None:
|
|
||||||
top_docs: list[dict[str, object]] = []
|
|
||||||
for item in items[:4]:
|
|
||||||
top_docs.append(
|
|
||||||
{
|
|
||||||
"doc": getattr(item, "path", ""),
|
|
||||||
"score": getattr(item, "score", 0),
|
|
||||||
"match_reason": getattr(item, "match_reason", ""),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
context.trace.module("process.v2.pipeline").log(
|
|
||||||
"ranking_explained",
|
|
||||||
{
|
|
||||||
"doc": getattr(item, "path", ""),
|
|
||||||
"score_breakdown": getattr(item, "score_breakdown", {}),
|
|
||||||
"score": getattr(item, "score", 0),
|
|
||||||
"match_reason": getattr(item, "match_reason", ""),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
context.trace.module("process.v2.pipeline").log(
|
|
||||||
"ranking_explained",
|
|
||||||
{
|
|
||||||
"top_docs_after_ranking": top_docs,
|
|
||||||
"ranking_score_breakdown": [
|
|
||||||
{
|
|
||||||
"doc": getattr(item, "path", ""),
|
|
||||||
"score_breakdown": getattr(item, "score_breakdown", {}),
|
|
||||||
}
|
|
||||||
for item in items[:4]
|
|
||||||
],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from app.core.agent.processes.v2.retrieval.metadata_lookup import DocsMetadataLookupIndex
|
|
||||||
from app.core.agent.processes.v2.retrieval.policy_resolver import V2RetrievalPolicyResolver
|
|
||||||
from app.core.agent.processes.v2.retrieval.target_doc_seeding import (
|
|
||||||
RagRowIndex,
|
|
||||||
normalize_doc_path,
|
|
||||||
seed_candidates_from_target_hints,
|
|
||||||
)
|
|
||||||
from app.core.agent.processes.v2.retrieval.v2_rag_adapter import V2RagRetrievalAdapter
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"V2RetrievalPolicyResolver",
|
|
||||||
"V2RagRetrievalAdapter",
|
|
||||||
"DocsMetadataLookupIndex",
|
|
||||||
"normalize_doc_path",
|
|
||||||
"RagRowIndex",
|
|
||||||
"seed_candidates_from_target_hints",
|
|
||||||
]
|
|
||||||
@@ -1,270 +0,0 @@
|
|||||||
"""Intent-aware retrieval policy resolver for process v2."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from app.core.agent.processes.v2.anchor_signals import anchor_signal_types
|
|
||||||
from app.core.agent.processes.v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
|
|
||||||
from app.core.rag.contracts.enums import RagLayer
|
|
||||||
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
|
||||||
|
|
||||||
|
|
||||||
class _AnchorTermCollector:
|
|
||||||
def prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
|
|
||||||
terms = self._hint_basenames(route)
|
|
||||||
terms.extend(route.anchors.endpoint_paths)
|
|
||||||
terms.extend(route.target_terms)
|
|
||||||
terms.extend(route.anchors.file_names)
|
|
||||||
terms.extend(route.anchors.entity_names)
|
|
||||||
terms.extend(route.anchors.matched_aliases)
|
|
||||||
terms.extend(self._process_terms(route))
|
|
||||||
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
|
|
||||||
|
|
||||||
def find_files_patterns(self, route: V2RouteResult) -> list[str]:
|
|
||||||
if route.anchors.target_doc_hints:
|
|
||||||
return [f"%{name.lower()}%" for name in self._hint_basenames(route)]
|
|
||||||
return self.prefer_like_patterns(route)
|
|
||||||
|
|
||||||
def api_method_patterns(self, route: V2RouteResult) -> list[str]:
|
|
||||||
terms = self._hint_basenames(route)
|
|
||||||
terms.extend(route.anchors.target_doc_hints)
|
|
||||||
terms.extend(route.anchors.endpoint_paths)
|
|
||||||
terms.extend(route.target_terms)
|
|
||||||
patterns: list[str] = []
|
|
||||||
for term in _unique_terms(terms):
|
|
||||||
lowered = term.lower()
|
|
||||||
stripped = lowered.strip("/")
|
|
||||||
if stripped:
|
|
||||||
patterns.append(f"%{stripped}%")
|
|
||||||
if lowered:
|
|
||||||
patterns.append(f"%{lowered}%")
|
|
||||||
return _unique_terms(patterns)
|
|
||||||
|
|
||||||
def _hint_basenames(self, route: V2RouteResult) -> list[str]:
|
|
||||||
return [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
|
|
||||||
|
|
||||||
def _process_terms(self, route: V2RouteResult) -> list[str]:
|
|
||||||
terms: list[str] = []
|
|
||||||
if route.anchors.process_domain:
|
|
||||||
terms.append(route.anchors.process_domain)
|
|
||||||
if route.anchors.process_subdomain:
|
|
||||||
terms.append(route.anchors.process_subdomain)
|
|
||||||
return terms
|
|
||||||
|
|
||||||
|
|
||||||
class _RouteFilterBuilder:
|
|
||||||
_API_DOC_PREFIXES = [
|
|
||||||
"docs/api/",
|
|
||||||
"docs/endpoints/",
|
|
||||||
"docs/methods/",
|
|
||||||
"api/",
|
|
||||||
"endpoints/",
|
|
||||||
"methods/",
|
|
||||||
]
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._terms = _AnchorTermCollector()
|
|
||||||
|
|
||||||
def general_filters(self, route: V2RouteResult) -> dict[str, object]:
|
|
||||||
return {
|
|
||||||
"prefer_path_prefixes": ["docs/architecture/", "docs/"],
|
|
||||||
"prefer_like_patterns": ["%readme.md%", "%overview%"],
|
|
||||||
"target_doc_hints": list(route.anchors.target_doc_hints),
|
|
||||||
}
|
|
||||||
|
|
||||||
def summary_filters(self, route: V2RouteResult) -> dict[str, object]:
|
|
||||||
if _is_api_method_explain(route):
|
|
||||||
return self.api_method_filters(route)
|
|
||||||
filters = self._base_filters(route)
|
|
||||||
filters["prefer_path_prefixes"] = self._summary_prefixes(route)
|
|
||||||
filters["prefer_like_patterns"] = self._terms.prefer_like_patterns(route)
|
|
||||||
if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
|
|
||||||
filters["path_prefixes"] = ["docs/api/", "docs/"]
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
|
|
||||||
filters = self._base_filters(route)
|
|
||||||
filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
|
|
||||||
filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
|
|
||||||
filters["prefer_like_patterns"] = self._terms.api_method_patterns(route)
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def find_files_filters(self, route: V2RouteResult) -> dict[str, object]:
|
|
||||||
filters = self._base_filters(route)
|
|
||||||
prefixes = self._find_files_prefixes(route)
|
|
||||||
if prefixes:
|
|
||||||
filters["path_prefixes"] = prefixes
|
|
||||||
filters["prefer_path_prefixes"] = self._find_files_prefer_prefixes(route, prefixes)
|
|
||||||
filters["prefer_like_patterns"] = self._terms.find_files_patterns(route)
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
|
|
||||||
filters: dict[str, object] = {
|
|
||||||
"target_doc_hints": list(route.anchors.target_doc_hints),
|
|
||||||
}
|
|
||||||
if route.anchors.process_domain:
|
|
||||||
filters["metadata.domain"] = route.anchors.process_domain
|
|
||||||
if route.anchors.process_subdomain:
|
|
||||||
filters["metadata.subdomain"] = route.anchors.process_subdomain
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def _find_files_prefixes(self, route: V2RouteResult) -> list[str]:
|
|
||||||
hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
|
|
||||||
if hint_prefixes:
|
|
||||||
return hint_prefixes
|
|
||||||
file_prefixes = [name for name in route.anchors.file_names if str(name).strip().startswith("docs/")]
|
|
||||||
derived = _prefixes_from_paths(file_prefixes)
|
|
||||||
if derived:
|
|
||||||
return derived
|
|
||||||
signals = anchor_signal_types(route)
|
|
||||||
if V2AnchorType.API_ENDPOINT in signals:
|
|
||||||
return ["docs/api/", "docs/"]
|
|
||||||
if V2AnchorType.ARCHITECTURE in signals:
|
|
||||||
return ["docs/architecture/", "docs/"]
|
|
||||||
if V2AnchorType.LOGIC_FLOW in signals:
|
|
||||||
return ["docs/logic/", "docs/"]
|
|
||||||
if V2AnchorType.DOMAIN_ENTITY in signals:
|
|
||||||
return ["docs/domains/", "docs/"]
|
|
||||||
return ["docs/"]
|
|
||||||
|
|
||||||
def _find_files_prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
|
|
||||||
preferred = list(prefixes)
|
|
||||||
if route.anchors.process_domain or route.anchors.process_subdomain:
|
|
||||||
preferred.extend(["docs/domains/", "docs/logic/"])
|
|
||||||
return _unique_terms(preferred or ["docs/"])
|
|
||||||
|
|
||||||
def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
|
|
||||||
signals = anchor_signal_types(route)
|
|
||||||
prefixes: list[str] = []
|
|
||||||
if V2AnchorType.API_ENDPOINT in signals:
|
|
||||||
prefixes.extend(["docs/api/", "docs/"])
|
|
||||||
if V2AnchorType.ARCHITECTURE in signals:
|
|
||||||
prefixes.extend(["docs/architecture/", "docs/"])
|
|
||||||
if V2AnchorType.LOGIC_FLOW in signals:
|
|
||||||
prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
|
|
||||||
if V2AnchorType.DOMAIN_ENTITY in signals:
|
|
||||||
prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
|
|
||||||
return _unique_terms(prefixes or ["docs/"])
|
|
||||||
|
|
||||||
|
|
||||||
class V2RetrievalPolicyResolver:
|
|
||||||
_GENERAL_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_DOC_CHUNKS]
|
|
||||||
_FIND_FILES_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
|
|
||||||
_SUMMARY_LAYERS = {
|
|
||||||
"docs_api_method_explain": [
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_FACT_INDEX,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
"docs_summary_api_endpoint": [
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_FACT_INDEX,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
"docs_summary_logic_flow": [
|
|
||||||
RagLayer.DOCS_WORKFLOW_INDEX,
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
"docs_summary_domain_entity": [
|
|
||||||
RagLayer.DOCS_ENTITY_CATALOG,
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
"docs_summary_architecture": [
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_RELATION_GRAPH,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
"docs_summary_generic": [
|
|
||||||
RagLayer.DOCS_DOCUMENT_CATALOG,
|
|
||||||
RagLayer.DOCS_DOC_CHUNKS,
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._filters = _RouteFilterBuilder()
|
|
||||||
|
|
||||||
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
|
|
||||||
if route.intent == V2Intent.GENERAL_QA:
|
|
||||||
return RetrievalPlan(
|
|
||||||
profile="general_qa_grounded_summary",
|
|
||||||
layers=list(self._GENERAL_LAYERS),
|
|
||||||
limit=8,
|
|
||||||
filters=self._filters.general_filters(route),
|
|
||||||
)
|
|
||||||
if route.subintent == V2Subintent.FIND_FILES:
|
|
||||||
return RetrievalPlan(
|
|
||||||
profile="file_lookup",
|
|
||||||
layers=list(self._FIND_FILES_LAYERS),
|
|
||||||
limit=12,
|
|
||||||
filters=self._filters.find_files_filters(route),
|
|
||||||
)
|
|
||||||
profile = self._summary_profile(route)
|
|
||||||
return RetrievalPlan(
|
|
||||||
profile=profile,
|
|
||||||
layers=list(self._SUMMARY_LAYERS[profile]),
|
|
||||||
limit=10 if profile == "docs_api_method_explain" else 8,
|
|
||||||
filters=self._filters.summary_filters(route),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _summary_profile(self, route: V2RouteResult) -> str:
|
|
||||||
if _is_api_method_explain(route):
|
|
||||||
return "docs_api_method_explain"
|
|
||||||
meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
|
|
||||||
if len(meaningful) != 1:
|
|
||||||
return "docs_summary_generic"
|
|
||||||
mapping = {
|
|
||||||
V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
|
|
||||||
V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
|
|
||||||
V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
|
|
||||||
V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
|
|
||||||
}
|
|
||||||
return mapping.get(next(iter(meaningful)), "docs_summary_generic")
|
|
||||||
|
|
||||||
|
|
||||||
def _prefixes_from_paths(paths: list[str]) -> list[str]:
|
|
||||||
prefixes = []
|
|
||||||
for path in paths:
|
|
||||||
value = str(path).strip().strip("/")
|
|
||||||
if "/" not in value:
|
|
||||||
continue
|
|
||||||
prefix = value.rsplit("/", 1)[0] + "/"
|
|
||||||
if prefix:
|
|
||||||
prefixes.append(prefix)
|
|
||||||
return _unique_terms(prefixes)
|
|
||||||
|
|
||||||
|
|
||||||
def _unique_terms(items: list[str]) -> list[str]:
|
|
||||||
seen: set[str] = set()
|
|
||||||
unique: list[str] = []
|
|
||||||
for raw in items:
|
|
||||||
value = str(raw or "").strip()
|
|
||||||
if not value or value in seen:
|
|
||||||
continue
|
|
||||||
seen.add(value)
|
|
||||||
unique.append(value)
|
|
||||||
return unique
|
|
||||||
|
|
||||||
|
|
||||||
def _is_api_method_explain(route: V2RouteResult) -> bool:
|
|
||||||
if route.subintent != V2Subintent.SUMMARY:
|
|
||||||
return False
|
|
||||||
if route.anchors.endpoint_paths:
|
|
||||||
return True
|
|
||||||
if _has_api_like_hints(route.anchors.target_doc_hints):
|
|
||||||
return True
|
|
||||||
return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
|
|
||||||
|
|
||||||
|
|
||||||
def _has_api_like_hints(hints: list[str]) -> bool:
|
|
||||||
for hint in hints:
|
|
||||||
value = str(hint or "").strip().lower()
|
|
||||||
if not value:
|
|
||||||
continue
|
|
||||||
if value.startswith("/"):
|
|
||||||
return True
|
|
||||||
if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
|
|
||||||
return True
|
|
||||||
if "endpoint" in value or "method" in value:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
@@ -0,0 +1,194 @@
|
|||||||
|
"""Процесс v2: роутинг запроса и dispatch в workflow."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.core.agent.processes.base import AgentProcess, ProcessResult
|
||||||
|
from app.core.agent.processes.v2.intent_router import V2IntentRouter
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
|
||||||
|
DocExplainApiExposedContext,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.graph import DocUpdateFromFeatureGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.context import (
|
||||||
|
DocUpdateFromFeatureContext,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.general_qa_summary.workflow_runtime.context import GeneralQaSummaryContext
|
||||||
|
from app.core.agent.processes.v2.workflows.general_qa_summary.graph import GeneralQaSummaryGraph
|
||||||
|
from app.core.agent.utils.llm import AgentLlmService
|
||||||
|
from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
|
||||||
|
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||||
|
from app.core.agent.utils.process_v2.models import V2Domain, V2Intent, V2Subintent
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class V2Process(AgentProcess):
|
||||||
|
version = "v2"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
llm: AgentLlmService,
|
||||||
|
policy_resolver: RetrievalPlanResolver,
|
||||||
|
rag_adapter: V2RagRetrievalAdapter,
|
||||||
|
evidence_assembler: DocsEvidenceAssembler,
|
||||||
|
evidence_gate: DocsEvidenceGate | None = None,
|
||||||
|
router: V2IntentRouter | None = None,
|
||||||
|
docs_summary_prompt_name: str = "v2_docs_explain.summary_answer",
|
||||||
|
general_summary_prompt_name: str = "v2_general.summary_answer",
|
||||||
|
workflow_llm_enabled: bool = True,
|
||||||
|
doc_rules_enabled: bool = True,
|
||||||
|
) -> None:
|
||||||
|
self._router = router or V2IntentRouter()
|
||||||
|
gate = evidence_gate or DocsEvidenceGate()
|
||||||
|
self._docs_summary_prompt_name = docs_summary_prompt_name
|
||||||
|
self._general_summary_prompt_name = general_summary_prompt_name
|
||||||
|
self._workflow_llm_enabled = workflow_llm_enabled
|
||||||
|
self._doc_rules_enabled = doc_rules_enabled
|
||||||
|
self._workflows: dict[tuple[str, str, str], Any] = {
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.SUMMARY): DocExplainSummaryGraph(
|
||||||
|
llm,
|
||||||
|
policy_resolver=policy_resolver,
|
||||||
|
rag_adapter=rag_adapter,
|
||||||
|
evidence_assembler=evidence_assembler,
|
||||||
|
evidence_gate=gate,
|
||||||
|
),
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.FIND_FILES): DocExplainFindFilesGraph(
|
||||||
|
policy_resolver=policy_resolver,
|
||||||
|
rag_adapter=rag_adapter,
|
||||||
|
evidence_assembler=evidence_assembler,
|
||||||
|
evidence_gate=gate,
|
||||||
|
),
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_EXPLAIN, V2Subintent.API_EXPOSED): DocExplainApiExposedGraph(
|
||||||
|
policy_resolver=policy_resolver,
|
||||||
|
rag_adapter=rag_adapter,
|
||||||
|
),
|
||||||
|
(V2Domain.DOCS, V2Intent.DOC_UPDATE, V2Subintent.FROM_FEATURE): DocUpdateFromFeatureGraph(
|
||||||
|
llm=llm,
|
||||||
|
doc_rules_enabled=doc_rules_enabled,
|
||||||
|
),
|
||||||
|
(V2Domain.GENERAL, V2Intent.GENERAL_QA, V2Subintent.SUMMARY): GeneralQaSummaryGraph(
|
||||||
|
llm,
|
||||||
|
policy_resolver=policy_resolver,
|
||||||
|
rag_adapter=rag_adapter,
|
||||||
|
evidence_assembler=evidence_assembler,
|
||||||
|
evidence_gate=gate,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
async def run(self, context) -> ProcessResult:
|
||||||
|
rag_session_id = context.session.active_rag_session_id or ""
|
||||||
|
route = self._router.route(context.request.message, rag_session_id=rag_session_id or None)
|
||||||
|
context.trace.module("process.v2").log(
|
||||||
|
"intent_routed",
|
||||||
|
{
|
||||||
|
"routing_domain": route.routing_domain,
|
||||||
|
"intent": route.intent,
|
||||||
|
"subintent": route.subintent,
|
||||||
|
"normalized_query": route.normalized_query,
|
||||||
|
"target_terms": route.target_terms,
|
||||||
|
"anchors": route_anchor_summary(route),
|
||||||
|
"confidence": route.confidence,
|
||||||
|
"routing_mode": route.routing_mode,
|
||||||
|
"llm_router_used": route.llm_router_used,
|
||||||
|
"reason_short": route.reason_short,
|
||||||
|
"rag_session_id": rag_session_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._log_step(
|
||||||
|
context,
|
||||||
|
"router_resolved",
|
||||||
|
{
|
||||||
|
"domain": route.routing_domain,
|
||||||
|
"intent": route.intent,
|
||||||
|
"subintent": route.subintent,
|
||||||
|
"confidence": route.confidence,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._log_step(
|
||||||
|
context,
|
||||||
|
"anchors_extracted",
|
||||||
|
{
|
||||||
|
"signal_types": route_anchor_summary(route)["signal_types"],
|
||||||
|
"endpoint_paths": route.anchors.endpoint_paths,
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
"matched_aliases": route.anchors.matched_aliases,
|
||||||
|
"target_terms": route.target_terms,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._log_step(
|
||||||
|
context,
|
||||||
|
"alias_resolution",
|
||||||
|
{
|
||||||
|
"resolved_aliases": route.anchors.matched_aliases,
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
flow_context = await self._run_workflow(context, route, rag_session_id)
|
||||||
|
if flow_context.answer_generated_payload is not None:
|
||||||
|
self._log_step(context, "answer_generated", dict(flow_context.answer_generated_payload))
|
||||||
|
changeset = list(getattr(flow_context, "changeset", []) or [])
|
||||||
|
apply_changeset = bool(getattr(flow_context, "apply_changeset", False))
|
||||||
|
return ProcessResult(
|
||||||
|
answer=flow_context.answer,
|
||||||
|
changeset=changeset,
|
||||||
|
apply_changeset=apply_changeset,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _log_step(self, context, step: str, payload: dict[str, object]) -> None:
|
||||||
|
context.trace.module("process.v2.pipeline").log(step, payload)
|
||||||
|
|
||||||
|
async def _run_workflow(self, runtime_context, route, rag_session_id: str):
|
||||||
|
workflow = self._workflows.get((route.routing_domain, route.intent, route.subintent))
|
||||||
|
if workflow is None:
|
||||||
|
raise ValueError(f"Unsupported v2 workflow route: {(route.routing_domain, route.intent, route.subintent)!r}")
|
||||||
|
if route.intent == V2Intent.GENERAL_QA:
|
||||||
|
return await workflow.run(
|
||||||
|
GeneralQaSummaryContext(
|
||||||
|
runtime=runtime_context,
|
||||||
|
route=route,
|
||||||
|
rag_session_id=rag_session_id,
|
||||||
|
prompt_name=self._general_summary_prompt_name,
|
||||||
|
workflow_llm_enabled=self._workflow_llm_enabled,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if route.subintent == V2Subintent.FIND_FILES:
|
||||||
|
return await workflow.run(
|
||||||
|
DocExplainFindFilesContext(
|
||||||
|
runtime=runtime_context,
|
||||||
|
route=route,
|
||||||
|
rag_session_id=rag_session_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if route.subintent == V2Subintent.API_EXPOSED:
|
||||||
|
return await workflow.run(
|
||||||
|
DocExplainApiExposedContext(
|
||||||
|
runtime=runtime_context,
|
||||||
|
route=route,
|
||||||
|
rag_session_id=rag_session_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if route.intent == V2Intent.DOC_UPDATE and route.subintent == V2Subintent.FROM_FEATURE:
|
||||||
|
return await workflow.run(
|
||||||
|
DocUpdateFromFeatureContext(
|
||||||
|
runtime=runtime_context,
|
||||||
|
route=route,
|
||||||
|
rag_session_id=rag_session_id,
|
||||||
|
doc_rules_enabled=self._doc_rules_enabled,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return await workflow.run(
|
||||||
|
DocExplainSummaryContext(
|
||||||
|
runtime=runtime_context,
|
||||||
|
route=route,
|
||||||
|
rag_session_id=rag_session_id,
|
||||||
|
prompt_name=self._docs_summary_prompt_name,
|
||||||
|
workflow_llm_enabled=self._workflow_llm_enabled,
|
||||||
|
)
|
||||||
|
)
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
# DOC_EXPLAIN / API_EXPOSED Workflow
|
||||||
|
|
||||||
|
## Контракт сабинтента
|
||||||
|
|
||||||
|
| Поле | Значение |
|
||||||
|
|---|---|
|
||||||
|
| `domain` | `DOCS` |
|
||||||
|
| `intent` | `DOC_EXPLAIN` |
|
||||||
|
| `subintent` | `API_EXPOSED` |
|
||||||
|
| `workflow_id` | `v2.docs_explain.api_exposed` |
|
||||||
|
| `source` | `workflow.v2.api_exposed` |
|
||||||
|
|
||||||
|
## Выходной формат
|
||||||
|
|
||||||
|
Ответ формируется детерминированно как список endpoint-путей (`/path`) по одному на строку.
|
||||||
|
Scope учитывается через retrieval-policy фильтры `metadata.domain`/`metadata.subdomain` и path-префиксы API-документации.
|
||||||
|
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.graph import DocExplainApiExposedGraph
|
||||||
|
|
||||||
|
__all__ = ["DocExplainApiExposedGraph"]
|
||||||
|
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.build_api_exposed_evidence_step import (
|
||||||
|
BuildApiExposedEvidenceStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.fetch_rag_rows_step import FetchRagRowsStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.finalize_api_exposed_answer_step import (
|
||||||
|
FinalizeApiExposedAnswerStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.require_rag_session_step import (
|
||||||
|
RequireRagSessionStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.resolve_retrieval_plan_step import (
|
||||||
|
ResolveRetrievalPlanStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
|
||||||
|
ApiEndpointCollector,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.buffered_graph import (
|
||||||
|
DocExplainApiExposedWorkflowGraph,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import (
|
||||||
|
DocExplainApiExposedContext,
|
||||||
|
)
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainApiExposedGraph(DocExplainApiExposedWorkflowGraph[DocExplainApiExposedContext]):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
policy_resolver: RetrievalPlanResolver,
|
||||||
|
rag_adapter: V2RagRetrievalAdapter,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
workflow_id="v2.docs_explain.api_exposed",
|
||||||
|
source="workflow.v2.api_exposed",
|
||||||
|
steps=[
|
||||||
|
RequireRagSessionStep(
|
||||||
|
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
|
||||||
|
),
|
||||||
|
ResolveRetrievalPlanStep(policy_resolver),
|
||||||
|
FetchRagRowsStep(rag_adapter),
|
||||||
|
BuildApiExposedEvidenceStep(ApiEndpointCollector()),
|
||||||
|
FinalizeApiExposedAnswerStep(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
"""Steps for DOC_EXPLAIN/API_EXPOSED workflow."""
|
||||||
|
|
||||||
+39
@@ -0,0 +1,39 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.steps.retrieval.api_endpoint_collector import (
|
||||||
|
ApiEndpointCollector,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import ApiWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=ApiWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class BuildApiExposedEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "build_api_exposed_evidence"
|
||||||
|
title = "Сборка списка API"
|
||||||
|
|
||||||
|
def __init__(self, collector: ApiEndpointCollector) -> None:
|
||||||
|
self._collector = collector
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
context.endpoints = self._collector.collect(context.retrieved_rows)
|
||||||
|
context.runtime.trace.module("process.v2.evidence").log(
|
||||||
|
"evidence_assembled",
|
||||||
|
{"mode": "api_exposed", "endpoint_count": len(context.endpoints), "endpoints": context.endpoints},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_assembled",
|
||||||
|
{"mode": "api_exposed", "endpoint_count": len(context.endpoints)},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"endpoint_count": len(context.endpoints)}
|
||||||
|
|
||||||
+31
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "fetch_rag_rows"
|
||||||
|
title = "Получение строк из RAG"
|
||||||
|
|
||||||
|
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
|
||||||
|
self._rag_adapter = rag_adapter
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer or context.retrieval_plan is None:
|
||||||
|
return context
|
||||||
|
context.retrieved_rows = await self._rag_adapter.fetch_rows(
|
||||||
|
context.rag_session_id,
|
||||||
|
context.route.normalized_query,
|
||||||
|
context.retrieval_plan,
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"retrieved_row_count": len(context.retrieved_rows)}
|
||||||
|
|
||||||
+30
@@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context import DocExplainApiExposedContext
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
|
||||||
|
class FinalizeApiExposedAnswerStep(WorkflowStep[DocExplainApiExposedContext]):
|
||||||
|
step_id = "finalize_api_exposed_answer"
|
||||||
|
title = "Формирование ответа со списком API"
|
||||||
|
|
||||||
|
async def run(self, context: DocExplainApiExposedContext) -> DocExplainApiExposedContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
if not context.endpoints:
|
||||||
|
context.answer = "Не нашёл задокументированных API-эндпоинтов в выбранном scope."
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": "insufficient_evidence",
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
return context
|
||||||
|
context.answer = "\n".join(context.endpoints)
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": "deterministic",
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: DocExplainApiExposedContext) -> dict[str, object]:
|
||||||
|
return {"answer_length": len(context.answer)}
|
||||||
|
|
||||||
+30
@@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "require_rag_session"
|
||||||
|
title = "Проверка RAG-сессии"
|
||||||
|
|
||||||
|
def __init__(self, *, missing_message: str) -> None:
|
||||||
|
self._missing_message = missing_message
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.rag_session_id:
|
||||||
|
return context
|
||||||
|
context.answer = self._missing_message
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": "insufficient_evidence",
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"has_rag_session": bool(context.rag_session_id)}
|
||||||
|
|
||||||
+38
@@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_api_exposed.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "resolve_retrieval_plan"
|
||||||
|
title = "Выбор retrieval-плана"
|
||||||
|
|
||||||
|
def __init__(self, resolver: RetrievalPlanResolver) -> None:
|
||||||
|
self._resolver = resolver
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
plan = self._resolver.resolve(context.route)
|
||||||
|
context.retrieval_plan = plan
|
||||||
|
context.runtime.trace.module("process.v2.retrieval_policy").log(
|
||||||
|
"retrieval_plan_resolved",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"retrieval_profile_selected",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"profile": getattr(context.retrieval_plan, "profile", "")}
|
||||||
|
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Retrieval helpers for DOC_EXPLAIN/API_EXPOSED workflow."""
|
||||||
|
|
||||||
+77
@@ -0,0 +1,77 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ApiEndpointCollector:
|
||||||
|
_METHODS = ("GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS")
|
||||||
|
_ENDPOINT_VALUE_RE = re.compile(
|
||||||
|
r"\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)(?:\s*\|\s*(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS))*)\s+(/[-a-zA-Z0-9_./{}]+)"
|
||||||
|
)
|
||||||
|
_METHOD_PATH_RE = re.compile(r"\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+(/[-a-zA-Z0-9_./{}]+)")
|
||||||
|
_PATH_RE = re.compile(r"(/[-a-zA-Z0-9_./{}]+)")
|
||||||
|
_DOC_EXTS = (".md", ".yaml", ".yml", ".json")
|
||||||
|
|
||||||
|
def collect(self, rows: list[dict]) -> list[str]:
|
||||||
|
endpoints: list[str] = []
|
||||||
|
for row in rows:
|
||||||
|
self._append_from_endpoint_metadata(endpoints, row)
|
||||||
|
self._append_from_title_fallback(endpoints, row)
|
||||||
|
for raw in self._row_candidates(row):
|
||||||
|
self._append_from_text(endpoints, raw)
|
||||||
|
return sorted(set(endpoints))
|
||||||
|
|
||||||
|
def _append_from_title_fallback(self, out: list[str], row: dict) -> None:
|
||||||
|
title = str(row.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return
|
||||||
|
for match in self._PATH_RE.findall(title):
|
||||||
|
self._append_default(out, match)
|
||||||
|
|
||||||
|
def _append_from_endpoint_metadata(self, out: list[str], row: dict) -> None:
|
||||||
|
metadata = dict(row.get("metadata") or {})
|
||||||
|
endpoint_value = str(metadata.get("endpoint") or "").strip()
|
||||||
|
if not endpoint_value:
|
||||||
|
return
|
||||||
|
for methods, path in self._ENDPOINT_VALUE_RE.findall(endpoint_value):
|
||||||
|
self._append_methods_with_path(out, methods, path)
|
||||||
|
|
||||||
|
def _row_candidates(self, row: dict) -> list[str]:
|
||||||
|
metadata = dict(row.get("metadata") or {})
|
||||||
|
values = [
|
||||||
|
metadata.get("name"),
|
||||||
|
metadata.get("summary_text"),
|
||||||
|
row.get("title"),
|
||||||
|
]
|
||||||
|
return [str(value or "") for value in values if str(value or "").strip()]
|
||||||
|
|
||||||
|
def _append_from_text(self, out: list[str], text: str) -> None:
|
||||||
|
for method, path in self._METHOD_PATH_RE.findall(text):
|
||||||
|
self._append_with_method(out, method, path)
|
||||||
|
|
||||||
|
def _append_methods_with_path(self, out: list[str], methods_raw: str, path_raw: str) -> None:
|
||||||
|
methods = [
|
||||||
|
part.strip().upper()
|
||||||
|
for part in str(methods_raw or "").split("|")
|
||||||
|
if part.strip().upper() in self._METHODS
|
||||||
|
]
|
||||||
|
if not methods:
|
||||||
|
self._append_default(out, path_raw)
|
||||||
|
return
|
||||||
|
for method in methods:
|
||||||
|
self._append_with_method(out, method, path_raw)
|
||||||
|
|
||||||
|
def _append_default(self, out: list[str], raw: str) -> None:
|
||||||
|
self._append_with_method(out, "GET", raw)
|
||||||
|
|
||||||
|
def _append_with_method(self, out: list[str], method: str, raw: str) -> None:
|
||||||
|
value = str(raw or "").strip().strip("`'\"()[].,:;!?").lower()
|
||||||
|
if not value.startswith("/"):
|
||||||
|
return
|
||||||
|
if value.endswith(self._DOC_EXTS):
|
||||||
|
return
|
||||||
|
if len(value.split("/")) < 2:
|
||||||
|
return
|
||||||
|
endpoint = f"{method.upper()} {value}"
|
||||||
|
if endpoint not in out:
|
||||||
|
out.append(endpoint)
|
||||||
+64
@@ -0,0 +1,64 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.models import V2Intent, V2RouteResult, V2Subintent
|
||||||
|
from app.core.rag.contracts.enums import RagLayer
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainApiExposedRetrievalPolicy:
|
||||||
|
_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG]
|
||||||
|
_API_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
|
||||||
|
|
||||||
|
def supports(self, route: V2RouteResult) -> bool:
|
||||||
|
return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.API_EXPOSED
|
||||||
|
|
||||||
|
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
|
||||||
|
return RetrievalPlan(
|
||||||
|
profile="api_exposed",
|
||||||
|
layers=list(self._LAYERS),
|
||||||
|
limit=400,
|
||||||
|
filters=self._filters(route),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _filters(self, route: V2RouteResult) -> dict[str, object]:
|
||||||
|
query_signals = self._query_signals(route)
|
||||||
|
filters: dict[str, object] = {
|
||||||
|
"metadata.type": "api_method",
|
||||||
|
"prefer_path_prefixes": list(self._API_PREFIXES),
|
||||||
|
"target_doc_hints": list(route.anchors.target_doc_hints),
|
||||||
|
"prefer_like_patterns": self._like_patterns(route),
|
||||||
|
}
|
||||||
|
if query_signals:
|
||||||
|
filters["query_signals"] = query_signals
|
||||||
|
if route.anchors.process_domain:
|
||||||
|
filters["metadata.domain"] = route.anchors.process_domain
|
||||||
|
if route.anchors.process_subdomain:
|
||||||
|
filters["metadata.subdomain"] = route.anchors.process_subdomain
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def _like_patterns(self, route: V2RouteResult) -> list[str]:
|
||||||
|
raw: list[str] = ["api", "endpoint", "method", "эндпоинт", "метод"]
|
||||||
|
raw.extend(route.target_terms)
|
||||||
|
raw.extend(route.anchors.endpoint_paths)
|
||||||
|
raw.extend(route.anchors.target_doc_hints)
|
||||||
|
raw.extend(candidate.value for candidate in route.anchors.candidate_apis)
|
||||||
|
return [f"%{item.lower()}%" for item in _unique(raw)]
|
||||||
|
|
||||||
|
def _query_signals(self, route: V2RouteResult) -> list[str]:
|
||||||
|
raw: list[str] = []
|
||||||
|
raw.extend(route.target_terms)
|
||||||
|
raw.extend(route.anchors.endpoint_paths)
|
||||||
|
blocked = {"api", "endpoint", "method", "эндпоинт", "метод"}
|
||||||
|
return [item for item in _unique(raw) if item.lower() not in blocked]
|
||||||
|
|
||||||
|
|
||||||
|
def _unique(items: list[str]) -> list[str]:
|
||||||
|
out: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for item in items:
|
||||||
|
value = str(item or "").strip()
|
||||||
|
if not value or value in seen:
|
||||||
|
continue
|
||||||
|
seen.add(value)
|
||||||
|
out.append(value)
|
||||||
|
return out
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Runtime helpers for the DOC_EXPLAIN/API_EXPOSED workflow."""
|
||||||
|
|
||||||
+42
@@ -0,0 +1,42 @@
|
|||||||
|
"""Buffered graph for DOC_EXPLAIN/API_EXPOSED workflow."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.utils.workflow.context import WorkflowContext
|
||||||
|
from app.core.agent.utils.workflow.graph import WorkflowGraph
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=WorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainApiExposedWorkflowGraph(WorkflowGraph[TContext]):
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
trace = context.runtime.trace.module(self._source)
|
||||||
|
trace.log("workflow_started", {"workflow_id": self._workflow_id})
|
||||||
|
steps_buffer: list[dict[str, object]] = []
|
||||||
|
for step in self._steps:
|
||||||
|
inp = step.trace_input(context)
|
||||||
|
request_id = context.runtime.request.request_id
|
||||||
|
await context.runtime.publisher.publish_status(
|
||||||
|
request_id,
|
||||||
|
self._source,
|
||||||
|
f"Шаг workflow: {step.title}.",
|
||||||
|
{"workflow_id": self._workflow_id, "step_id": step.step_id},
|
||||||
|
)
|
||||||
|
context = await step.run(context)
|
||||||
|
out = step.trace_output(context)
|
||||||
|
trace.log(
|
||||||
|
"workflow_step_traced",
|
||||||
|
{
|
||||||
|
"workflow_id": self._workflow_id,
|
||||||
|
"step": {"id": step.step_id, "title": step.title},
|
||||||
|
"input": inp,
|
||||||
|
"output": out,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
|
||||||
|
trace.log("workflow_trace_flushed", {"workflow_id": self._workflow_id, "steps": steps_buffer})
|
||||||
|
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
|
||||||
|
return context
|
||||||
|
|
||||||
+20
@@ -0,0 +1,20 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DocExplainApiExposedContext:
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
retrieval_plan: RetrievalPlan | None = None
|
||||||
|
retrieved_rows: list[dict] = field(default_factory=list)
|
||||||
|
endpoints: list[str] = field(default_factory=list)
|
||||||
|
answer: str = ""
|
||||||
|
answer_generated_payload: dict[str, object] | None = None
|
||||||
|
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
"""Context protocols for the DOC_EXPLAIN/API_EXPOSED workflow."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class RetrievalWorkflowContext(Protocol):
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
retrieval_plan: RetrievalPlan | None
|
||||||
|
retrieved_rows: list[dict]
|
||||||
|
answer: str
|
||||||
|
answer_generated_payload: dict[str, object] | None
|
||||||
|
|
||||||
|
|
||||||
|
class ApiWorkflowContext(RetrievalWorkflowContext, Protocol):
|
||||||
|
endpoints: list[str]
|
||||||
|
|
||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
"""Pipeline logging helpers for DOC_EXPLAIN/API_EXPOSED."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
|
||||||
|
runtime.trace.module("process.v2.pipeline").log(step, payload)
|
||||||
|
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
# DOC_EXPLAIN / FIND_FILES Workflow
|
||||||
|
|
||||||
|
## Контракт сабинтента
|
||||||
|
|
||||||
|
| Поле | Значение |
|
||||||
|
|---|---|
|
||||||
|
| `domain` | `DOCS` |
|
||||||
|
| `intent` | `DOC_EXPLAIN` |
|
||||||
|
| `subintent` | `FIND_FILES` |
|
||||||
|
| `workflow_id` | `v2.docs_explain.find_files` |
|
||||||
|
| `source` | `workflow.v2.find_files` |
|
||||||
|
|
||||||
|
## Диаграмма флоу
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
|
||||||
|
B --> C["FetchRagRowsStep"]
|
||||||
|
C --> D["PrepareCandidateRowsStep"]
|
||||||
|
D --> E["BuildFilesEvidenceStep"]
|
||||||
|
E --> F["ApplyFilesEvidenceGateStep"]
|
||||||
|
F --> G["FinalizeFindFilesAnswerStep"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Шаги процесса
|
||||||
|
|
||||||
|
### 1) `RequireRagSessionStep`
|
||||||
|
|
||||||
|
Шаг проверяет, есть ли активная RAG-сессия. Если `rag_session_id` пустой, workflow останавливает дальнейший retrieval и пишет пользовательское сообщение в `answer`. Для `find_files` gate-решение на этом шаге обычно не ставится, но механизм поддержан.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rag_session_id` | `V2Process` -> `DocExplainFindFilesContext` | Идентификатор активной RAG-сессии |
|
||||||
|
| `self._missing_message` | Конфигурация в `graph.py` | Текст ответа, если сессии нет |
|
||||||
|
| `self._missing_gate` | Конфигурация шага | Опциональный gate для раннего выхода |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.answer` | Заполняется `missing_message`, если `rag_session_id` пустой |
|
||||||
|
| `context.gate_decision` | Заполняется `missing_gate`, если он передан и сессии нет |
|
||||||
|
| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
|
||||||
|
|
||||||
|
### 2) `ResolveRetrievalPlanStep`
|
||||||
|
|
||||||
|
Шаг превращает route в retrieval-план через `RetrievalPlanResolver`. Профиль для этого сабинтента — `file_lookup`, с подходящими слоями и фильтрами. Параллельно пишет trace-событие `retrieval_plan_resolved`.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.route` | Результат `intent_router` | Route с `anchors`, `target_terms`, `scope_type` |
|
||||||
|
| `self._resolver` | DI из `graph.py` | Реализация policy-резолвера |
|
||||||
|
| `context.answer` | Предыдущие шаги | Если уже есть ответ, шаг пропускается |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
|
||||||
|
| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог с `profile`, `layers`, `limit`, `filters` |
|
||||||
|
|
||||||
|
### 3) `FetchRagRowsStep`
|
||||||
|
|
||||||
|
Шаг выполняет retrieval через `V2RagRetrievalAdapter`. Внутри адаптера объединяются seed-строки по `target_doc_hints` и основной retrieval по эмбеддингам/фильтрам плана. Если план не сформирован или уже есть готовый ответ, шаг ничего не делает.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rag_session_id` | Контекст workflow | Сессия для поиска в `rag_chunks` |
|
||||||
|
| `context.route.normalized_query` | Route | Нормализованный текст запроса |
|
||||||
|
| `context.retrieval_plan` | Предыдущий шаг | План retrieval |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.retrieved_rows` | `await rag_adapter.fetch_rows(rag_session_id, normalized_query, retrieval_plan)` |
|
||||||
|
|
||||||
|
### 4) `PrepareCandidateRowsStep`
|
||||||
|
|
||||||
|
Шаг собирает итоговые candidate rows для ранжирования файлов. Он добавляет metadata-lookup кандидаты и подмешивает seed по `target_doc_hints`, затем сохраняет merged-список в `context.rows`. Дополнительно пишет детальный retrieval-trace.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.retrieved_rows` | `FetchRagRowsStep` | Строки после retrieval |
|
||||||
|
| `context.route` | Route | Нужен для hints/aliases/terms |
|
||||||
|
| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata lookup |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
|
||||||
|
| `process.v2.rag_retrieval.rag_rows_fetched` | Лог деталей rows, источников и top результатов |
|
||||||
|
|
||||||
|
### 5) `BuildFilesEvidenceStep`
|
||||||
|
|
||||||
|
Шаг ранжирует candidate rows в список файлов через `DocsEvidenceAssembler.assemble_files`. На выходе формируется shortlist `RetrievedFile` с оценками и причинами совпадения. Этот shortlist становится опорой для gate и финального ответа.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
|
||||||
|
| `context.route` | Route | Сигналы маршрута для ranking |
|
||||||
|
| `self._assembler` | DI из `graph.py` | Сборщик evidence |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.files` | `assemble_files(context.rows, context.route)` |
|
||||||
|
| `process.v2.evidence.evidence_assembled` | Лог file-count и путей |
|
||||||
|
|
||||||
|
### 6) `ApplyFilesEvidenceGateStep`
|
||||||
|
|
||||||
|
Шаг проверяет качество shortlist через `DocsEvidenceGate.check_files`. Решение gate определяет, можно ли отвечать детерминированно или нужно более осторожное поведение. Для прозрачности пишет pipeline-лог с полями `passed/reason/answer_mode`.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.route` | Route | Сигналы запроса для gate |
|
||||||
|
| `context.files` | `BuildFilesEvidenceStep` | Ранжированные файлы |
|
||||||
|
| `self._gate` | DI из `graph.py` | Правила оценки evidence |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.gate_decision` | `self._gate.check_files(context.route, context.files)` |
|
||||||
|
| `process.v2.pipeline.evidence_gate_checked` | Лог результата gate |
|
||||||
|
|
||||||
|
### 7) `FinalizeFindFilesAnswerStep`
|
||||||
|
|
||||||
|
Шаг собирает финальный текстовый ответ без LLM. Если файлов нет, возвращается `insufficient_evidence`; если файл один — отдаётся один путь; если несколько — до 4 путей. Если gate вернул `low_confidence_shortlist`, также возвращается ограниченный список путей.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.files` | `BuildFilesEvidenceStep` | Список найденных файлов |
|
||||||
|
| `context.gate_decision` | `ApplyFilesEvidenceGateStep` | Режим и причина ответа |
|
||||||
|
| `context.answer` | Предыдущие шаги | Если уже заполнен, шаг пропускается |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.answer` | Детерминированно: пусто/1 путь/до 4 путей |
|
||||||
|
| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по ветке формирования |
|
||||||
|
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.graph import DocExplainFindFilesGraph
|
||||||
|
|
||||||
|
__all__ = ["DocExplainFindFilesGraph"]
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.buffered_graph import DocExplainFindFilesWorkflowGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.apply_files_evidence_gate_step import (
|
||||||
|
ApplyFilesEvidenceGateStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.build_files_evidence_step import BuildFilesEvidenceStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.fetch_rag_rows_step import FetchRagRowsStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.finalize_find_files_answer_step import (
|
||||||
|
FinalizeFindFilesAnswerStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.prepare_candidate_rows_step import (
|
||||||
|
PrepareCandidateRowsStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.require_rag_session_step import RequireRagSessionStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.resolve_retrieval_plan_step import (
|
||||||
|
ResolveRetrievalPlanStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainFindFilesGraph(DocExplainFindFilesWorkflowGraph[DocExplainFindFilesContext]):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
policy_resolver: RetrievalPlanResolver,
|
||||||
|
rag_adapter: V2RagRetrievalAdapter,
|
||||||
|
evidence_assembler: DocsEvidenceAssembler,
|
||||||
|
evidence_gate: DocsEvidenceGate,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
workflow_id="v2.docs_explain.find_files",
|
||||||
|
source="workflow.v2.find_files",
|
||||||
|
steps=[
|
||||||
|
RequireRagSessionStep(
|
||||||
|
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
|
||||||
|
),
|
||||||
|
ResolveRetrievalPlanStep(policy_resolver),
|
||||||
|
FetchRagRowsStep(rag_adapter),
|
||||||
|
PrepareCandidateRowsStep(CandidateRowsBuilder()),
|
||||||
|
BuildFilesEvidenceStep(evidence_assembler),
|
||||||
|
ApplyFilesEvidenceGateStep(evidence_gate),
|
||||||
|
FinalizeFindFilesAnswerStep(),
|
||||||
|
],
|
||||||
|
)
|
||||||
+36
@@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class ApplyFilesEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "apply_files_evidence_gate"
|
||||||
|
title = "Проверка file evidence"
|
||||||
|
|
||||||
|
def __init__(self, gate: DocsEvidenceGate) -> None:
|
||||||
|
self._gate = gate
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
context.gate_decision = self._gate.check_files(context.route, context.files)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_gate_checked",
|
||||||
|
{
|
||||||
|
"passed": context.gate_decision.passed,
|
||||||
|
"reason": context.gate_decision.reason,
|
||||||
|
"answer_mode": context.gate_decision.answer_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
|
||||||
+45
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import FindFilesWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
|
||||||
|
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=FindFilesWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class BuildFilesEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "build_files_evidence"
|
||||||
|
title = "Сборка file evidence"
|
||||||
|
|
||||||
|
def __init__(self, assembler: DocsEvidenceAssembler) -> None:
|
||||||
|
self._assembler = assembler
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
context.files = self._assembler.assemble_files(context.rows, context.route)
|
||||||
|
context.runtime.trace.module("process.v2.evidence").log(
|
||||||
|
"evidence_assembled",
|
||||||
|
{
|
||||||
|
"mode": "find_files",
|
||||||
|
"file_count": len(context.files),
|
||||||
|
"files": [item.path for item in context.files],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_assembled",
|
||||||
|
{
|
||||||
|
"mode": "find_files",
|
||||||
|
"primary_file": context.files[0].path if context.files else None,
|
||||||
|
"file_count": len(context.files),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_ranking(context.runtime, context.files)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"file_count": len(context.files)}
|
||||||
+30
@@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "fetch_rag_rows"
|
||||||
|
title = "Получение строк из RAG"
|
||||||
|
|
||||||
|
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
|
||||||
|
self._rag_adapter = rag_adapter
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer or context.retrieval_plan is None:
|
||||||
|
return context
|
||||||
|
context.retrieved_rows = await self._rag_adapter.fetch_rows(
|
||||||
|
context.rag_session_id,
|
||||||
|
context.route.normalized_query,
|
||||||
|
context.retrieval_plan,
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"retrieved_row_count": len(context.retrieved_rows)}
|
||||||
+34
@@ -0,0 +1,34 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context import DocExplainFindFilesContext
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
|
||||||
|
class FinalizeFindFilesAnswerStep(WorkflowStep[DocExplainFindFilesContext]):
|
||||||
|
step_id = "finalize_find_files_answer"
|
||||||
|
title = "Сборка списка файлов"
|
||||||
|
|
||||||
|
async def run(self, context: DocExplainFindFilesContext) -> DocExplainFindFilesContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
if not context.files:
|
||||||
|
context.answer = "Не нашёл файлов документации, которые уверенно соответствуют запросу."
|
||||||
|
context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
|
||||||
|
return context
|
||||||
|
if context.gate_decision is not None and context.gate_decision.reason == "low_confidence_shortlist":
|
||||||
|
context.answer = "\n".join(item.path for item in context.files[:4])
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": context.gate_decision.answer_mode,
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
return context
|
||||||
|
if len(context.files) == 1:
|
||||||
|
context.answer = context.files[0].path
|
||||||
|
context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
|
||||||
|
return context
|
||||||
|
context.answer = "\n".join(item.path for item in context.files[:4])
|
||||||
|
context.answer_generated_payload = {"answer_mode": "deterministic", "answer_length": len(context.answer)}
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: DocExplainFindFilesContext) -> dict[str, object]:
|
||||||
|
return {"answer_length": len(context.answer)}
|
||||||
+36
@@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.steps.retrieval.candidate_rows import CandidateRowsBuilder
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_retrieval_trace
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "prepare_candidate_rows"
|
||||||
|
title = "Подготовка candidate rows"
|
||||||
|
|
||||||
|
def __init__(self, builder: CandidateRowsBuilder) -> None:
|
||||||
|
self._builder = builder
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer or context.retrieval_plan is None:
|
||||||
|
return context
|
||||||
|
prepared = self._builder.build(context.retrieved_rows, context.route)
|
||||||
|
context.rows = prepared.rows
|
||||||
|
log_retrieval_trace(
|
||||||
|
context.runtime,
|
||||||
|
context.route,
|
||||||
|
context.retrieval_plan,
|
||||||
|
context.retrieved_rows,
|
||||||
|
prepared.metadata_rows,
|
||||||
|
prepared.rows,
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"row_count": len(context.rows)}
|
||||||
+43
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "require_rag_session"
|
||||||
|
title = "Проверка RAG-сессии"
|
||||||
|
|
||||||
|
def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
|
||||||
|
self._missing_message = missing_message
|
||||||
|
self._missing_gate = missing_gate
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.rag_session_id:
|
||||||
|
return context
|
||||||
|
context.answer = self._missing_message
|
||||||
|
if self._missing_gate is not None:
|
||||||
|
context.gate_decision = self._missing_gate
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": self._missing_gate.answer_mode,
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_gate_checked",
|
||||||
|
{
|
||||||
|
"passed": self._missing_gate.passed,
|
||||||
|
"reason": self._missing_gate.reason,
|
||||||
|
"answer_mode": self._missing_gate.answer_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"has_rag_session": bool(context.rag_session_id)}
|
||||||
+37
@@ -0,0 +1,37 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_find_files.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "resolve_retrieval_plan"
|
||||||
|
title = "Выбор retrieval-плана"
|
||||||
|
|
||||||
|
def __init__(self, resolver: RetrievalPlanResolver) -> None:
|
||||||
|
self._resolver = resolver
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
plan = self._resolver.resolve(context.route)
|
||||||
|
context.retrieval_plan = plan
|
||||||
|
context.runtime.trace.module("process.v2.retrieval_policy").log(
|
||||||
|
"retrieval_plan_resolved",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"retrieval_profile_selected",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"profile": getattr(context.retrieval_plan, "profile", "")}
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Retrieval-related step helpers for the doc-explain find-files workflow."""
|
||||||
|
|
||||||
+43
@@ -0,0 +1,43 @@
|
|||||||
|
"""Сборка candidate rows для doc-explain find-files (метаданные + сиды по hints)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
|
||||||
|
RagRowIndex,
|
||||||
|
merge_row_lists,
|
||||||
|
normalize_doc_path,
|
||||||
|
normalized_path_set,
|
||||||
|
seed_candidates_from_target_hints,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class CandidateRowsResult:
|
||||||
|
metadata_rows: list[dict]
|
||||||
|
rows: list[dict]
|
||||||
|
|
||||||
|
|
||||||
|
class CandidateRowsBuilder:
|
||||||
|
def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
|
||||||
|
metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
|
||||||
|
rows = merge_row_lists(retrieved_rows, metadata_rows)
|
||||||
|
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
|
||||||
|
self._print_missing_target_hints(route, rows)
|
||||||
|
return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
|
||||||
|
|
||||||
|
def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
|
||||||
|
if not route.anchors.target_doc_hints:
|
||||||
|
return
|
||||||
|
candidate_paths = normalized_path_set(rows)
|
||||||
|
for hint in route.anchors.target_doc_hints:
|
||||||
|
if not str(hint or "").strip():
|
||||||
|
continue
|
||||||
|
normalized = normalize_doc_path(hint)
|
||||||
|
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
|
||||||
|
continue
|
||||||
|
if normalized not in candidate_paths:
|
||||||
|
print("ERROR: target doc missing from candidates:", normalized)
|
||||||
+99
@@ -0,0 +1,99 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
|
||||||
|
from app.core.agent.utils.process_v2.models import V2AnchorType, V2RouteResult, V2Subintent
|
||||||
|
from app.core.rag.contracts.enums import RagLayer
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainFindFilesRetrievalPolicy:
|
||||||
|
_LAYERS = [RagLayer.DOCS_DOCUMENT_CATALOG, RagLayer.DOCS_ENTITY_CATALOG]
|
||||||
|
|
||||||
|
def supports(self, route: V2RouteResult) -> bool:
|
||||||
|
return route.subintent == V2Subintent.FIND_FILES
|
||||||
|
|
||||||
|
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
|
||||||
|
return RetrievalPlan(
|
||||||
|
profile="file_lookup",
|
||||||
|
layers=list(self._LAYERS),
|
||||||
|
limit=12,
|
||||||
|
filters=self._build_filters(route),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_filters(self, route: V2RouteResult) -> dict[str, object]:
|
||||||
|
filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
|
||||||
|
if route.anchors.process_domain:
|
||||||
|
filters["metadata.domain"] = route.anchors.process_domain
|
||||||
|
if route.anchors.process_subdomain:
|
||||||
|
filters["metadata.subdomain"] = route.anchors.process_subdomain
|
||||||
|
prefixes = self._path_prefixes(route)
|
||||||
|
if prefixes:
|
||||||
|
filters["path_prefixes"] = prefixes
|
||||||
|
filters["prefer_path_prefixes"] = self._prefer_prefixes(route, prefixes)
|
||||||
|
filters["prefer_like_patterns"] = self._like_patterns(route)
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def _path_prefixes(self, route: V2RouteResult) -> list[str]:
|
||||||
|
hint_prefixes = _prefixes_from_paths(route.anchors.target_doc_hints)
|
||||||
|
if hint_prefixes:
|
||||||
|
return hint_prefixes
|
||||||
|
file_prefixes = [item for item in route.anchors.file_names if str(item).strip().startswith("docs/")]
|
||||||
|
derived = _prefixes_from_paths(file_prefixes)
|
||||||
|
if derived:
|
||||||
|
return derived
|
||||||
|
signals = anchor_signal_types(route)
|
||||||
|
if V2AnchorType.API_ENDPOINT in signals:
|
||||||
|
return ["docs/api/", "docs/"]
|
||||||
|
if V2AnchorType.ARCHITECTURE in signals:
|
||||||
|
return ["docs/architecture/", "docs/"]
|
||||||
|
if V2AnchorType.LOGIC_FLOW in signals:
|
||||||
|
return ["docs/logic/", "docs/"]
|
||||||
|
if V2AnchorType.DOMAIN_ENTITY in signals:
|
||||||
|
return ["docs/domains/", "docs/"]
|
||||||
|
return ["docs/"]
|
||||||
|
|
||||||
|
def _prefer_prefixes(self, route: V2RouteResult, prefixes: list[str]) -> list[str]:
|
||||||
|
preferred = list(prefixes)
|
||||||
|
if route.anchors.process_domain or route.anchors.process_subdomain:
|
||||||
|
preferred.extend(["docs/domains/", "docs/logic/"])
|
||||||
|
return _unique_terms(preferred or ["docs/"])
|
||||||
|
|
||||||
|
def _like_patterns(self, route: V2RouteResult) -> list[str]:
|
||||||
|
if route.anchors.target_doc_hints:
|
||||||
|
names = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
|
||||||
|
return [f"%{name.lower()}%" for name in names]
|
||||||
|
terms = list(route.target_terms)
|
||||||
|
terms.extend(route.anchors.endpoint_paths)
|
||||||
|
terms.extend(route.anchors.file_names)
|
||||||
|
terms.extend(route.anchors.entity_names)
|
||||||
|
terms.extend(route.anchors.matched_aliases)
|
||||||
|
if route.anchors.process_domain:
|
||||||
|
terms.append(route.anchors.process_domain)
|
||||||
|
if route.anchors.process_subdomain:
|
||||||
|
terms.append(route.anchors.process_subdomain)
|
||||||
|
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
|
||||||
|
|
||||||
|
|
||||||
|
def _prefixes_from_paths(paths: list[str]) -> list[str]:
|
||||||
|
prefixes: list[str] = []
|
||||||
|
for path in paths:
|
||||||
|
value = str(path).strip().strip("/")
|
||||||
|
if "/" not in value:
|
||||||
|
continue
|
||||||
|
prefix = value.rsplit("/", 1)[0] + "/"
|
||||||
|
if prefix:
|
||||||
|
prefixes.append(prefix)
|
||||||
|
return _unique_terms(prefixes)
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_terms(items: list[str]) -> list[str]:
|
||||||
|
seen: set[str] = set()
|
||||||
|
unique: list[str] = []
|
||||||
|
for raw in items:
|
||||||
|
value = str(raw or "").strip()
|
||||||
|
if not value or value in seen:
|
||||||
|
continue
|
||||||
|
seen.add(value)
|
||||||
|
unique.append(value)
|
||||||
|
return unique
|
||||||
|
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Runtime helpers for the doc-explain find-files workflow."""
|
||||||
|
|
||||||
+46
@@ -0,0 +1,46 @@
|
|||||||
|
"""Граф workflow doc-explain find-files: буфер шагов и один сброс в trace (на базе utils.workflow)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.utils.workflow.context import WorkflowContext
|
||||||
|
from app.core.agent.utils.workflow.graph import WorkflowGraph
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=WorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainFindFilesWorkflowGraph(WorkflowGraph[TContext]):
|
||||||
|
"""Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
trace = context.runtime.trace.module(self._source)
|
||||||
|
trace.log("workflow_started", {"workflow_id": self._workflow_id})
|
||||||
|
steps_buffer: list[dict[str, object]] = []
|
||||||
|
for step in self._steps:
|
||||||
|
inp = step.trace_input(context)
|
||||||
|
request_id = context.runtime.request.request_id
|
||||||
|
await context.runtime.publisher.publish_status(
|
||||||
|
request_id,
|
||||||
|
self._source,
|
||||||
|
f"Шаг workflow: {step.title}.",
|
||||||
|
{"workflow_id": self._workflow_id, "step_id": step.step_id},
|
||||||
|
)
|
||||||
|
context = await step.run(context)
|
||||||
|
out = step.trace_output(context)
|
||||||
|
trace.log(
|
||||||
|
"workflow_step_traced",
|
||||||
|
{
|
||||||
|
"workflow_id": self._workflow_id,
|
||||||
|
"step": {"id": step.step_id, "title": step.title},
|
||||||
|
"input": inp,
|
||||||
|
"output": out,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
|
||||||
|
trace.log(
|
||||||
|
"workflow_trace_flushed",
|
||||||
|
{"workflow_id": self._workflow_id, "steps": steps_buffer},
|
||||||
|
)
|
||||||
|
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
|
||||||
|
return context
|
||||||
+22
@@ -0,0 +1,22 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DocExplainFindFilesContext:
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
retrieval_plan: RetrievalPlan | None = None
|
||||||
|
retrieved_rows: list[dict] = field(default_factory=list)
|
||||||
|
rows: list[dict] = field(default_factory=list)
|
||||||
|
files: list[RetrievedFile] = field(default_factory=list)
|
||||||
|
gate_decision: EvidenceGateDecision | None = None
|
||||||
|
answer: str = ""
|
||||||
|
answer_generated_payload: dict[str, object] | None = None
|
||||||
+26
@@ -0,0 +1,26 @@
|
|||||||
|
"""Протоколы контекста для workflow doc-explain find-files."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.process_v2.models import RetrievedFile, V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class RetrievalWorkflowContext(Protocol):
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
retrieval_plan: RetrievalPlan | None
|
||||||
|
retrieved_rows: list[dict]
|
||||||
|
rows: list[dict]
|
||||||
|
gate_decision: EvidenceGateDecision | None
|
||||||
|
answer: str
|
||||||
|
answer_generated_payload: dict[str, object] | None
|
||||||
|
|
||||||
|
|
||||||
|
class FindFilesWorkflowContext(RetrievalWorkflowContext, Protocol):
|
||||||
|
files: list[RetrievedFile]
|
||||||
+106
@@ -0,0 +1,106 @@
|
|||||||
|
"""Логирование retrieval/pipeline/ranking для doc-explain find-files."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
|
||||||
|
|
||||||
|
|
||||||
|
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
|
||||||
|
runtime.trace.module("process.v2.pipeline").log(step, payload)
|
||||||
|
|
||||||
|
|
||||||
|
def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
|
||||||
|
runtime.trace.module("process.v2.rag_retrieval").log(
|
||||||
|
"rag_rows_fetched",
|
||||||
|
{
|
||||||
|
"profile": plan.profile,
|
||||||
|
"row_count": len(rows),
|
||||||
|
"rows": [trace_row(row) for row in rows],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"candidate_generation",
|
||||||
|
{
|
||||||
|
"query": route.user_query,
|
||||||
|
"profile": plan.profile,
|
||||||
|
"details": {
|
||||||
|
"target_doc_hints": list(route.anchors.target_doc_hints),
|
||||||
|
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
|
||||||
|
},
|
||||||
|
"resolved_aliases": route.anchors.matched_aliases,
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
"candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
|
||||||
|
"sources": {
|
||||||
|
"seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
|
||||||
|
"metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
|
||||||
|
"semantic": [trace_row(row) for row in retrieved_rows[:5]],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"retrieval_executed",
|
||||||
|
{
|
||||||
|
"query": route.user_query,
|
||||||
|
"profile": plan.profile,
|
||||||
|
"row_count": len(rows),
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
"top_results": [trace_row(row) for row in rows[:5]],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def log_ranking(runtime, items: list) -> None:
|
||||||
|
top_docs: list[dict[str, object]] = []
|
||||||
|
for item in items[:4]:
|
||||||
|
top_docs.append(
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score": getattr(item, "score", 0),
|
||||||
|
"match_reason": getattr(item, "match_reason", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"ranking_explained",
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score_breakdown": getattr(item, "score_breakdown", {}),
|
||||||
|
"score": getattr(item, "score", 0),
|
||||||
|
"match_reason": getattr(item, "match_reason", ""),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"ranking_explained",
|
||||||
|
{
|
||||||
|
"top_docs_after_ranking": top_docs,
|
||||||
|
"ranking_score_breakdown": [
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score_breakdown": getattr(item, "score_breakdown", {}),
|
||||||
|
}
|
||||||
|
for item in items[:4]
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def trace_row(row: dict) -> dict[str, object]:
|
||||||
|
metadata = row.get("metadata") or {}
|
||||||
|
content = str(row.get("content") or "").strip()
|
||||||
|
return {
|
||||||
|
"layer": str(row.get("layer") or ""),
|
||||||
|
"path": str(row.get("path") or ""),
|
||||||
|
"title": str(row.get("title") or ""),
|
||||||
|
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
|
||||||
|
"entity_name": str(metadata.get("entity_name") or ""),
|
||||||
|
"summary_text": str(metadata.get("summary_text") or "")[:400],
|
||||||
|
"section_path": str(metadata.get("section_path") or ""),
|
||||||
|
"metadata_domain": str(metadata.get("domain") or ""),
|
||||||
|
"metadata_subdomain": str(metadata.get("subdomain") or ""),
|
||||||
|
"content_preview": content[:400],
|
||||||
|
}
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
# DOC_EXPLAIN / SUMMARY Workflow
|
||||||
|
|
||||||
|
## Контракт сабинтента
|
||||||
|
|
||||||
|
| Поле | Значение |
|
||||||
|
|---|---|
|
||||||
|
| `domain` | `DOCS` |
|
||||||
|
| `intent` | `DOC_EXPLAIN` |
|
||||||
|
| `subintent` | `SUMMARY` |
|
||||||
|
| `workflow_id` | `v2.docs_explain.summary` |
|
||||||
|
| `source` | `workflow.v2.summary` |
|
||||||
|
|
||||||
|
## Диаграмма флоу
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["RequireRagSessionStep"] --> B["ResolveRetrievalPlanStep"]
|
||||||
|
B --> C["FetchRagRowsStep"]
|
||||||
|
C --> D["PrepareCandidateRowsStep"]
|
||||||
|
D --> E["BuildSummaryEvidenceStep"]
|
||||||
|
E --> F["ApplySummaryEvidenceGateStep"]
|
||||||
|
F --> G["GenerateSummaryAnswerStep"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Шаги процесса
|
||||||
|
|
||||||
|
### 1) `RequireRagSessionStep`
|
||||||
|
|
||||||
|
Шаг валидирует наличие активной RAG-сессии до retrieval. Если `rag_session_id` отсутствует, workflow завершает обработку и записывает сообщение об отсутствии опоры. Это предотвращает запуск последующих шагов без индекса проекта.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rag_session_id` | `V2Process` -> `DocExplainSummaryContext` | Идентификатор RAG-сессии |
|
||||||
|
| `self._missing_message` | Конфигурация в `graph.py` | Ответ при отсутствии сессии |
|
||||||
|
| `self._missing_gate` | Конфиг шага | Опциональный gate для раннего выхода |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.answer` | Заполняется `missing_message` при пустом `rag_session_id` |
|
||||||
|
| `context.gate_decision` | Заполняется, если передан `missing_gate` |
|
||||||
|
| `context.answer_generated_payload` | Формируется как `{"answer_mode", "answer_length"}` при раннем ответе |
|
||||||
|
|
||||||
|
### 2) `ResolveRetrievalPlanStep`
|
||||||
|
|
||||||
|
Шаг вызывает policy-резолвер и строит `RetrievalPlan` для doc-summary сценария. Профиль и фильтры зависят от сигналов route: endpoint/architecture/logic/domain. Результат сохраняется в контекст и логируется в trace.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.route` | `intent_router` | Route с `target_terms` и `anchors` |
|
||||||
|
| `self._resolver` | DI из `graph.py` | Реализация `RetrievalPlanResolver` |
|
||||||
|
| `context.answer` | Предыдущие шаги | При наличии ответа шаг пропускается |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.retrieval_plan` | `self._resolver.resolve(context.route)` |
|
||||||
|
| `process.v2.retrieval_policy.retrieval_plan_resolved` | Лог профиля, слоёв, лимита и фильтров |
|
||||||
|
|
||||||
|
### 3) `FetchRagRowsStep`
|
||||||
|
|
||||||
|
Шаг выполняет retrieval по сформированному плану. Внутри адаптера объединяются seed-результаты по `target_doc_hints` и основной retrieval. Это даёт более устойчивую выдачу как по явным hints, так и по семантическому совпадению.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rag_session_id` | Контекст workflow | Сессия для поиска |
|
||||||
|
| `context.route.normalized_query` | Route | Нормализованный запрос |
|
||||||
|
| `context.retrieval_plan` | `ResolveRetrievalPlanStep` | План retrieval |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.retrieved_rows` | `await rag_adapter.fetch_rows(...)` |
|
||||||
|
|
||||||
|
### 4) `PrepareCandidateRowsStep`
|
||||||
|
|
||||||
|
Шаг обогащает найденные строки через metadata lookup и подмешивает кандидатов из `target_doc_hints`. На выходе формируется `context.rows`, который уже используется для построения summary-evidence. Шаг также пишет подробные retrieval-логи для анализа ранжирования.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.retrieved_rows` | `FetchRagRowsStep` | Строки retrieval |
|
||||||
|
| `context.route` | Route | Сигналы hints/aliases/terms |
|
||||||
|
| `self._builder` | `CandidateRowsBuilder()` | Логика merge и metadata поиска |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.rows` | `prepared.rows` из `CandidateRowsBuilder.build(...)` |
|
||||||
|
| `process.v2.rag_retrieval.rag_rows_fetched` | Лог выборки и источников кандидатов |
|
||||||
|
|
||||||
|
### 5) `BuildSummaryEvidenceStep`
|
||||||
|
|
||||||
|
Шаг преобразует candidate rows в список `RetrievedSummary` через `DocsEvidenceAssembler.assemble_summaries`. Здесь формируется shortlist документов с оценками и причинами совпадения, который затем проверяется gate. Дополнительно логируются ranking-разборы.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.rows` | `PrepareCandidateRowsStep` | Подготовленные кандидаты |
|
||||||
|
| `context.route` | Route | Сигналы для ranking |
|
||||||
|
| `self._assembler` | DI из `graph.py` | Сборщик summary evidence |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.documents` | `assemble_summaries(context.rows, context.route)` |
|
||||||
|
| `process.v2.evidence.evidence_assembled` | Лог `document_count` и списка путей |
|
||||||
|
|
||||||
|
### 6) `ApplySummaryEvidenceGateStep`
|
||||||
|
|
||||||
|
Шаг оценивает достаточность и надёжность собранного summary-evidence. Решение gate влияет на дальнейшее формирование ответа: либо прямой отказ/ограничение, либо генерация summary. Результат логируется в pipeline.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.route` | Route | Контекст запроса |
|
||||||
|
| `context.documents` | `BuildSummaryEvidenceStep` | Кандидаты summary |
|
||||||
|
| `self._gate` | DI из `graph.py` | Правила валидации evidence |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.gate_decision` | `self._gate.check_summaries(context.route, context.documents)` |
|
||||||
|
| `process.v2.pipeline.evidence_gate_checked` | Лог `passed/reason/answer_mode` |
|
||||||
|
|
||||||
|
### 7) `GenerateSummaryAnswerStep`
|
||||||
|
|
||||||
|
Шаг формирует финальный ответ: gate-message, детерминированный ответ или LLM-генерация. При LLM-ветке собирается `prompt_input` из пользовательского запроса, route-сигналов и найденных summary-блоков. Итог всегда записывается в `context.answer` и сопровождается `answer_generated_payload`.
|
||||||
|
|
||||||
|
**Входные параметры**
|
||||||
|
|
||||||
|
| Параметр | Откуда берётся | Описание |
|
||||||
|
|---|---|---|
|
||||||
|
| `context.documents` | `BuildSummaryEvidenceStep` | Опорные summary-документы |
|
||||||
|
| `context.gate_decision` | `ApplySummaryEvidenceGateStep` | Режим допуска к генерации |
|
||||||
|
| `context.workflow_llm_enabled` | `V2Process` | Флаг LLM/детерминизм |
|
||||||
|
| `context.prompt_name` | `V2Process` | Имя prompt-шаблона |
|
||||||
|
| `self._llm` | DI из `graph.py` | LLM-сервис |
|
||||||
|
|
||||||
|
**Выходные параметры**
|
||||||
|
|
||||||
|
| Параметр | Как формируется |
|
||||||
|
|---|---|
|
||||||
|
| `context.prompt_input` | Склеивается из route + shortlist документов |
|
||||||
|
| `context.answer` | Gate-message / deterministic summary / результат `llm.generate(...)` |
|
||||||
|
| `context.answer_generated_payload` | `{"answer_mode", "answer_length"}` по выбранной ветке |
|
||||||
|
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.graph import DocExplainSummaryGraph
|
||||||
|
|
||||||
|
__all__ = ["DocExplainSummaryGraph"]
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.buffered_graph import DocExplainSummaryWorkflowGraph
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.apply_summary_evidence_gate_step import (
|
||||||
|
ApplySummaryEvidenceGateStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.build_summary_evidence_step import BuildSummaryEvidenceStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.fetch_rag_rows_step import FetchRagRowsStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.generate_summary_answer_step import (
|
||||||
|
GenerateSummaryAnswerStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.prepare_candidate_rows_step import (
|
||||||
|
PrepareCandidateRowsStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.require_rag_session_step import RequireRagSessionStep
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.resolve_retrieval_plan_step import (
|
||||||
|
ResolveRetrievalPlanStep,
|
||||||
|
)
|
||||||
|
from app.core.agent.utils.llm import AgentLlmService
|
||||||
|
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainSummaryGraph(DocExplainSummaryWorkflowGraph[DocExplainSummaryContext]):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
llm: AgentLlmService,
|
||||||
|
policy_resolver: RetrievalPlanResolver,
|
||||||
|
rag_adapter: V2RagRetrievalAdapter,
|
||||||
|
evidence_assembler: DocsEvidenceAssembler,
|
||||||
|
evidence_gate: DocsEvidenceGate,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
workflow_id="v2.docs_explain.summary",
|
||||||
|
source="workflow.v2.summary",
|
||||||
|
steps=[
|
||||||
|
RequireRagSessionStep(
|
||||||
|
missing_message="Для процесса v2 нужна активная RAG-сессия проекта с проиндексированной документацией."
|
||||||
|
),
|
||||||
|
ResolveRetrievalPlanStep(policy_resolver),
|
||||||
|
FetchRagRowsStep(rag_adapter),
|
||||||
|
PrepareCandidateRowsStep(CandidateRowsBuilder()),
|
||||||
|
BuildSummaryEvidenceStep(evidence_assembler),
|
||||||
|
ApplySummaryEvidenceGateStep(evidence_gate),
|
||||||
|
GenerateSummaryAnswerStep(llm),
|
||||||
|
],
|
||||||
|
)
|
||||||
+36
@@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import DocsEvidenceGate
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class ApplySummaryEvidenceGateStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "apply_summary_evidence_gate"
|
||||||
|
title = "Проверка summary evidence"
|
||||||
|
|
||||||
|
def __init__(self, gate: DocsEvidenceGate) -> None:
|
||||||
|
self._gate = gate
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
context.gate_decision = self._gate.check_summaries(context.route, context.documents)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_gate_checked",
|
||||||
|
{
|
||||||
|
"passed": context.gate_decision.passed,
|
||||||
|
"reason": context.gate_decision.reason,
|
||||||
|
"answer_mode": context.gate_decision.answer_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"passed": bool(context.gate_decision and context.gate_decision.passed)}
|
||||||
+45
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import SummaryWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step, log_ranking
|
||||||
|
from app.core.agent.utils.process_v2.evidence.assembler import DocsEvidenceAssembler
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=SummaryWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class BuildSummaryEvidenceStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "build_summary_evidence"
|
||||||
|
title = "Сборка summary evidence"
|
||||||
|
|
||||||
|
def __init__(self, assembler: DocsEvidenceAssembler) -> None:
|
||||||
|
self._assembler = assembler
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
context.documents = self._assembler.assemble_summaries(context.rows, context.route)
|
||||||
|
context.runtime.trace.module("process.v2.evidence").log(
|
||||||
|
"evidence_assembled",
|
||||||
|
{
|
||||||
|
"mode": "summary",
|
||||||
|
"document_count": len(context.documents),
|
||||||
|
"documents": [item.path for item in context.documents],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_assembled",
|
||||||
|
{
|
||||||
|
"mode": "summary",
|
||||||
|
"primary_doc": context.documents[0].path if context.documents else None,
|
||||||
|
"document_count": len(context.documents),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_ranking(context.runtime, context.documents)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"document_count": len(context.documents)}
|
||||||
+30
@@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import V2RagRetrievalAdapter
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class FetchRagRowsStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "fetch_rag_rows"
|
||||||
|
title = "Получение строк из RAG"
|
||||||
|
|
||||||
|
def __init__(self, rag_adapter: V2RagRetrievalAdapter) -> None:
|
||||||
|
self._rag_adapter = rag_adapter
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer or context.retrieval_plan is None:
|
||||||
|
return context
|
||||||
|
context.retrieved_rows = await self._rag_adapter.fetch_rows(
|
||||||
|
context.rag_session_id,
|
||||||
|
context.route.normalized_query,
|
||||||
|
context.retrieval_plan,
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"retrieved_row_count": len(context.retrieved_rows)}
|
||||||
+17
-12
@@ -3,28 +3,36 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from app.core.agent.processes.v2.anchor_signals import route_anchor_summary
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context import DocExplainSummaryContext
|
||||||
from app.core.agent.utils.llm import AgentLlmService
|
from app.core.agent.utils.llm import AgentLlmService
|
||||||
from app.core.agent.processes.v2.workflows.docs_explain_summary.context import DocsExplainSummaryContext
|
from app.core.agent.utils.process_v2.anchor_signals import route_anchor_summary
|
||||||
from app.core.agent.utils.workflow import WorkflowStep
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
|
||||||
class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
|
class GenerateSummaryAnswerStep(WorkflowStep[DocExplainSummaryContext]):
|
||||||
step_id = "generate_summary_answer"
|
step_id = "generate_summary_answer"
|
||||||
title = "Сборка ответа по summary"
|
title = "Сборка ответа по summary"
|
||||||
|
|
||||||
def __init__(self, llm: AgentLlmService) -> None:
|
def __init__(self, llm: AgentLlmService) -> None:
|
||||||
self._llm = llm
|
self._llm = llm
|
||||||
|
|
||||||
async def run(self, context: DocsExplainSummaryContext) -> DocsExplainSummaryContext:
|
async def run(self, context: DocExplainSummaryContext) -> DocExplainSummaryContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
if context.gate_decision is not None and not context.gate_decision.passed:
|
if context.gate_decision is not None and not context.gate_decision.passed:
|
||||||
context.answer = context.gate_decision.message
|
context.answer = context.gate_decision.message
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": context.gate_decision.answer_mode,
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
return context
|
return context
|
||||||
if not context.workflow_llm_enabled:
|
if not context.workflow_llm_enabled:
|
||||||
context.answer = self._build_deterministic_answer(context)
|
context.answer = self._build_deterministic_answer(context)
|
||||||
|
context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
|
||||||
return context
|
return context
|
||||||
if not context.documents:
|
if not context.documents:
|
||||||
context.answer = "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
|
context.answer = "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
|
||||||
|
context.answer_generated_payload = {"answer_mode": "insufficient_evidence", "answer_length": len(context.answer)}
|
||||||
return context
|
return context
|
||||||
context.prompt_input = self._build_prompt_input(context)
|
context.prompt_input = self._build_prompt_input(context)
|
||||||
request_id = context.runtime.request.request_id
|
request_id = context.runtime.request.request_id
|
||||||
@@ -35,9 +43,10 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
|
|||||||
log_context=f"agent:{request_id}",
|
log_context=f"agent:{request_id}",
|
||||||
trace=context.runtime.trace.module("workflow.v2.summary.llm"),
|
trace=context.runtime.trace.module("workflow.v2.summary.llm"),
|
||||||
)
|
)
|
||||||
|
context.answer_generated_payload = {"answer_mode": "grounded_summary", "answer_length": len(context.answer)}
|
||||||
return context
|
return context
|
||||||
|
|
||||||
def _build_prompt_input(self, context: DocsExplainSummaryContext) -> str:
|
def _build_prompt_input(self, context: DocExplainSummaryContext) -> str:
|
||||||
blocks = [
|
blocks = [
|
||||||
f"Запрос пользователя:\n{context.route.user_query}",
|
f"Запрос пользователя:\n{context.route.user_query}",
|
||||||
"Сигналы запроса:\n" + json.dumps(route_anchor_summary(context.route), ensure_ascii=False, indent=2),
|
"Сигналы запроса:\n" + json.dumps(route_anchor_summary(context.route), ensure_ascii=False, indent=2),
|
||||||
@@ -52,17 +61,13 @@ class GenerateSummaryAnswerStep(WorkflowStep[DocsExplainSummaryContext]):
|
|||||||
)
|
)
|
||||||
return "\n\n".join(blocks)
|
return "\n\n".join(blocks)
|
||||||
|
|
||||||
def _build_deterministic_answer(self, context: DocsExplainSummaryContext) -> str:
|
def _build_deterministic_answer(self, context: DocExplainSummaryContext) -> str:
|
||||||
if not context.documents:
|
if not context.documents:
|
||||||
return "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
|
return "Не нашёл подходящих SUMMARY-блоков в документации по этому запросу."
|
||||||
lines = []
|
lines = [context.documents[0].summary, "", "Файлы-источники:"]
|
||||||
primary = context.documents[0]
|
|
||||||
lines.append(primary.summary)
|
|
||||||
lines.append("")
|
|
||||||
lines.append("Файлы-источники:")
|
|
||||||
for item in context.documents:
|
for item in context.documents:
|
||||||
lines.append(f"- {item.path}")
|
lines.append(f"- {item.path}")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def trace_output(self, context: DocsExplainSummaryContext) -> dict[str, object]:
|
def trace_output(self, context: DocExplainSummaryContext) -> dict[str, object]:
|
||||||
return {"answer_length": len(context.answer)}
|
return {"answer_length": len(context.answer)}
|
||||||
+36
@@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.steps.retrieval.candidate_rows import CandidateRowsBuilder
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_retrieval_trace
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class PrepareCandidateRowsStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "prepare_candidate_rows"
|
||||||
|
title = "Подготовка candidate rows"
|
||||||
|
|
||||||
|
def __init__(self, builder: CandidateRowsBuilder) -> None:
|
||||||
|
self._builder = builder
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer or context.retrieval_plan is None:
|
||||||
|
return context
|
||||||
|
prepared = self._builder.build(context.retrieved_rows, context.route)
|
||||||
|
context.rows = prepared.rows
|
||||||
|
log_retrieval_trace(
|
||||||
|
context.runtime,
|
||||||
|
context.route,
|
||||||
|
context.retrieval_plan,
|
||||||
|
context.retrieved_rows,
|
||||||
|
prepared.metadata_rows,
|
||||||
|
prepared.rows,
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"row_count": len(context.rows)}
|
||||||
+43
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class RequireRagSessionStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "require_rag_session"
|
||||||
|
title = "Проверка RAG-сессии"
|
||||||
|
|
||||||
|
def __init__(self, *, missing_message: str, missing_gate: EvidenceGateDecision | None = None) -> None:
|
||||||
|
self._missing_message = missing_message
|
||||||
|
self._missing_gate = missing_gate
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.rag_session_id:
|
||||||
|
return context
|
||||||
|
context.answer = self._missing_message
|
||||||
|
if self._missing_gate is not None:
|
||||||
|
context.gate_decision = self._missing_gate
|
||||||
|
context.answer_generated_payload = {
|
||||||
|
"answer_mode": self._missing_gate.answer_mode,
|
||||||
|
"answer_length": len(context.answer),
|
||||||
|
}
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"evidence_gate_checked",
|
||||||
|
{
|
||||||
|
"passed": self._missing_gate.passed,
|
||||||
|
"reason": self._missing_gate.reason,
|
||||||
|
"answer_mode": self._missing_gate.answer_mode,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"has_rag_session": bool(context.rag_session_id)}
|
||||||
+37
@@ -0,0 +1,37 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.context_protocols import RetrievalWorkflowContext
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_explain_summary.workflow_runtime.pipeline_logging import log_pipeline_step
|
||||||
|
from app.core.agent.utils.process_v2.plan_resolver import RetrievalPlanResolver
|
||||||
|
from app.core.agent.utils.workflow import WorkflowStep
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=RetrievalWorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class ResolveRetrievalPlanStep(WorkflowStep[TContext], Generic[TContext]):
|
||||||
|
step_id = "resolve_retrieval_plan"
|
||||||
|
title = "Выбор retrieval-плана"
|
||||||
|
|
||||||
|
def __init__(self, resolver: RetrievalPlanResolver) -> None:
|
||||||
|
self._resolver = resolver
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
if context.answer:
|
||||||
|
return context
|
||||||
|
plan = self._resolver.resolve(context.route)
|
||||||
|
context.retrieval_plan = plan
|
||||||
|
context.runtime.trace.module("process.v2.retrieval_policy").log(
|
||||||
|
"retrieval_plan_resolved",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "limit": plan.limit, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
context.runtime,
|
||||||
|
"retrieval_profile_selected",
|
||||||
|
{"profile": plan.profile, "layers": plan.layers, "filters": plan.filters},
|
||||||
|
)
|
||||||
|
return context
|
||||||
|
|
||||||
|
def trace_output(self, context: TContext) -> dict[str, object]:
|
||||||
|
return {"profile": getattr(context.retrieval_plan, "profile", "")}
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Retrieval-related step helpers for the doc-explain summary workflow."""
|
||||||
|
|
||||||
+43
@@ -0,0 +1,43 @@
|
|||||||
|
"""Сборка candidate rows для doc-explain summary (метаданные + сиды по hints)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval import DocsMetadataLookupIndex
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import (
|
||||||
|
RagRowIndex,
|
||||||
|
merge_row_lists,
|
||||||
|
normalize_doc_path,
|
||||||
|
normalized_path_set,
|
||||||
|
seed_candidates_from_target_hints,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class CandidateRowsResult:
|
||||||
|
metadata_rows: list[dict]
|
||||||
|
rows: list[dict]
|
||||||
|
|
||||||
|
|
||||||
|
class CandidateRowsBuilder:
|
||||||
|
def build(self, retrieved_rows: list[dict], route: V2RouteResult) -> CandidateRowsResult:
|
||||||
|
metadata_rows = DocsMetadataLookupIndex(retrieved_rows).lookup(route)
|
||||||
|
rows = merge_row_lists(retrieved_rows, metadata_rows)
|
||||||
|
rows = seed_candidates_from_target_hints(rows, route.anchors.target_doc_hints, RagRowIndex(rows))
|
||||||
|
self._print_missing_target_hints(route, rows)
|
||||||
|
return CandidateRowsResult(metadata_rows=metadata_rows, rows=rows)
|
||||||
|
|
||||||
|
def _print_missing_target_hints(self, route: V2RouteResult, rows: list[dict]) -> None:
|
||||||
|
if not route.anchors.target_doc_hints:
|
||||||
|
return
|
||||||
|
candidate_paths = normalized_path_set(rows)
|
||||||
|
for hint in route.anchors.target_doc_hints:
|
||||||
|
if not str(hint or "").strip():
|
||||||
|
continue
|
||||||
|
normalized = normalize_doc_path(hint)
|
||||||
|
if not normalized.startswith("docs/") or "." not in normalized.rsplit("/", 1)[-1]:
|
||||||
|
continue
|
||||||
|
if normalized not in candidate_paths:
|
||||||
|
print("ERROR: target doc missing from candidates:", normalized)
|
||||||
+168
@@ -0,0 +1,168 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.anchor_signals import anchor_signal_types
|
||||||
|
from app.core.agent.utils.process_v2.models import V2AnchorType, V2Intent, V2RouteResult, V2Subintent
|
||||||
|
from app.core.rag.contracts.enums import RagLayer
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainSummaryRetrievalPolicy:
|
||||||
|
_SUMMARY_LAYERS = {
|
||||||
|
"docs_api_method_explain": [
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_FACT_INDEX,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
"docs_summary_api_endpoint": [
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_FACT_INDEX,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
"docs_summary_logic_flow": [
|
||||||
|
RagLayer.DOCS_WORKFLOW_INDEX,
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
"docs_summary_domain_entity": [
|
||||||
|
RagLayer.DOCS_ENTITY_CATALOG,
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
"docs_summary_architecture": [
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_RELATION_GRAPH,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
"docs_summary_generic": [
|
||||||
|
RagLayer.DOCS_DOCUMENT_CATALOG,
|
||||||
|
RagLayer.DOCS_DOC_CHUNKS,
|
||||||
|
],
|
||||||
|
}
|
||||||
|
_API_DOC_PREFIXES = ["docs/api/", "docs/endpoints/", "docs/methods/", "api/", "endpoints/", "methods/"]
|
||||||
|
|
||||||
|
def supports(self, route: V2RouteResult) -> bool:
|
||||||
|
return route.intent == V2Intent.DOC_EXPLAIN and route.subintent == V2Subintent.SUMMARY
|
||||||
|
|
||||||
|
def resolve(self, route: V2RouteResult) -> RetrievalPlan:
|
||||||
|
profile = self._profile(route)
|
||||||
|
return RetrievalPlan(
|
||||||
|
profile=profile,
|
||||||
|
layers=list(self._SUMMARY_LAYERS[profile]),
|
||||||
|
limit=10 if profile == "docs_api_method_explain" else 8,
|
||||||
|
filters=self._filters(route),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _filters(self, route: V2RouteResult) -> dict[str, object]:
|
||||||
|
if self._is_api_method_explain(route):
|
||||||
|
return self._api_method_filters(route)
|
||||||
|
filters = self._base_filters(route)
|
||||||
|
filters["prefer_path_prefixes"] = self._summary_prefixes(route)
|
||||||
|
filters["prefer_like_patterns"] = self._prefer_like_patterns(route)
|
||||||
|
if V2AnchorType.API_ENDPOINT in anchor_signal_types(route):
|
||||||
|
filters["path_prefixes"] = ["docs/api/", "docs/"]
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def _base_filters(self, route: V2RouteResult) -> dict[str, object]:
|
||||||
|
filters: dict[str, object] = {"target_doc_hints": list(route.anchors.target_doc_hints)}
|
||||||
|
if route.anchors.process_domain:
|
||||||
|
filters["metadata.domain"] = route.anchors.process_domain
|
||||||
|
if route.anchors.process_subdomain:
|
||||||
|
filters["metadata.subdomain"] = route.anchors.process_subdomain
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def _api_method_filters(self, route: V2RouteResult) -> dict[str, object]:
|
||||||
|
filters = self._base_filters(route)
|
||||||
|
filters["path_prefixes"] = list(self._API_DOC_PREFIXES)
|
||||||
|
filters["prefer_path_prefixes"] = list(self._API_DOC_PREFIXES)
|
||||||
|
filters["prefer_like_patterns"] = self._api_method_patterns(route)
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def _profile(self, route: V2RouteResult) -> str:
|
||||||
|
if self._is_api_method_explain(route):
|
||||||
|
return "docs_api_method_explain"
|
||||||
|
meaningful = anchor_signal_types(route) - {V2AnchorType.FIND_FILES}
|
||||||
|
if len(meaningful) != 1:
|
||||||
|
return "docs_summary_generic"
|
||||||
|
mapping = {
|
||||||
|
V2AnchorType.API_ENDPOINT: "docs_summary_api_endpoint",
|
||||||
|
V2AnchorType.ARCHITECTURE: "docs_summary_architecture",
|
||||||
|
V2AnchorType.LOGIC_FLOW: "docs_summary_logic_flow",
|
||||||
|
V2AnchorType.DOMAIN_ENTITY: "docs_summary_domain_entity",
|
||||||
|
}
|
||||||
|
return mapping.get(next(iter(meaningful)), "docs_summary_generic")
|
||||||
|
|
||||||
|
def _summary_prefixes(self, route: V2RouteResult) -> list[str]:
|
||||||
|
signals = anchor_signal_types(route)
|
||||||
|
prefixes: list[str] = []
|
||||||
|
if V2AnchorType.API_ENDPOINT in signals:
|
||||||
|
prefixes.extend(["docs/api/", "docs/"])
|
||||||
|
if V2AnchorType.ARCHITECTURE in signals:
|
||||||
|
prefixes.extend(["docs/architecture/", "docs/"])
|
||||||
|
if V2AnchorType.LOGIC_FLOW in signals:
|
||||||
|
prefixes.extend(["docs/logic/", "docs/architecture/", "docs/"])
|
||||||
|
if V2AnchorType.DOMAIN_ENTITY in signals:
|
||||||
|
prefixes.extend(["docs/domains/", "docs/", "docs/api/"])
|
||||||
|
return _unique_terms(prefixes or ["docs/"])
|
||||||
|
|
||||||
|
def _prefer_like_patterns(self, route: V2RouteResult) -> list[str]:
|
||||||
|
terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
|
||||||
|
terms.extend(route.anchors.endpoint_paths)
|
||||||
|
terms.extend(route.target_terms)
|
||||||
|
terms.extend(route.anchors.file_names)
|
||||||
|
terms.extend(route.anchors.entity_names)
|
||||||
|
terms.extend(route.anchors.matched_aliases)
|
||||||
|
if route.anchors.process_domain:
|
||||||
|
terms.append(route.anchors.process_domain)
|
||||||
|
if route.anchors.process_subdomain:
|
||||||
|
terms.append(route.anchors.process_subdomain)
|
||||||
|
return [f"%{term.lower()}%" for term in _unique_terms(terms)]
|
||||||
|
|
||||||
|
def _api_method_patterns(self, route: V2RouteResult) -> list[str]:
|
||||||
|
terms = [hint.rsplit("/", 1)[-1] for hint in route.anchors.target_doc_hints if str(hint).strip()]
|
||||||
|
terms.extend(route.anchors.target_doc_hints)
|
||||||
|
terms.extend(route.anchors.endpoint_paths)
|
||||||
|
terms.extend(route.target_terms)
|
||||||
|
patterns: list[str] = []
|
||||||
|
for term in _unique_terms(terms):
|
||||||
|
lowered = term.lower()
|
||||||
|
stripped = lowered.strip("/")
|
||||||
|
if stripped:
|
||||||
|
patterns.append(f"%{stripped}%")
|
||||||
|
if lowered:
|
||||||
|
patterns.append(f"%{lowered}%")
|
||||||
|
return _unique_terms(patterns)
|
||||||
|
|
||||||
|
def _is_api_method_explain(self, route: V2RouteResult) -> bool:
|
||||||
|
if route.subintent != V2Subintent.SUMMARY:
|
||||||
|
return False
|
||||||
|
if route.anchors.endpoint_paths:
|
||||||
|
return True
|
||||||
|
if _has_api_like_hints(route.anchors.target_doc_hints):
|
||||||
|
return True
|
||||||
|
return V2AnchorType.API_ENDPOINT in anchor_signal_types(route)
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_terms(items: list[str]) -> list[str]:
|
||||||
|
seen: set[str] = set()
|
||||||
|
unique: list[str] = []
|
||||||
|
for raw in items:
|
||||||
|
value = str(raw or "").strip()
|
||||||
|
if not value or value in seen:
|
||||||
|
continue
|
||||||
|
seen.add(value)
|
||||||
|
unique.append(value)
|
||||||
|
return unique
|
||||||
|
|
||||||
|
|
||||||
|
def _has_api_like_hints(hints: list[str]) -> bool:
|
||||||
|
for hint in hints:
|
||||||
|
value = str(hint or "").strip().lower()
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
if value.startswith("/"):
|
||||||
|
return True
|
||||||
|
if value.startswith(("docs/api/", "docs/endpoints/", "docs/methods/")):
|
||||||
|
return True
|
||||||
|
if "endpoint" in value or "method" in value:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
+2
@@ -0,0 +1,2 @@
|
|||||||
|
"""Runtime helpers for the doc-explain summary workflow."""
|
||||||
|
|
||||||
+46
@@ -0,0 +1,46 @@
|
|||||||
|
"""Граф workflow doc-explain summary: буфер шагов и один сброс в trace (на базе utils.workflow)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
|
from app.core.agent.utils.workflow.context import WorkflowContext
|
||||||
|
from app.core.agent.utils.workflow.graph import WorkflowGraph
|
||||||
|
|
||||||
|
TContext = TypeVar("TContext", bound=WorkflowContext)
|
||||||
|
|
||||||
|
|
||||||
|
class DocExplainSummaryWorkflowGraph(WorkflowGraph[TContext]):
|
||||||
|
"""Не логирует step_started/step_completed по отдельности; сбрасывает буфер в ``workflow_trace_flushed``."""
|
||||||
|
|
||||||
|
async def run(self, context: TContext) -> TContext:
|
||||||
|
trace = context.runtime.trace.module(self._source)
|
||||||
|
trace.log("workflow_started", {"workflow_id": self._workflow_id})
|
||||||
|
steps_buffer: list[dict[str, object]] = []
|
||||||
|
for step in self._steps:
|
||||||
|
inp = step.trace_input(context)
|
||||||
|
request_id = context.runtime.request.request_id
|
||||||
|
await context.runtime.publisher.publish_status(
|
||||||
|
request_id,
|
||||||
|
self._source,
|
||||||
|
f"Шаг workflow: {step.title}.",
|
||||||
|
{"workflow_id": self._workflow_id, "step_id": step.step_id},
|
||||||
|
)
|
||||||
|
context = await step.run(context)
|
||||||
|
out = step.trace_output(context)
|
||||||
|
trace.log(
|
||||||
|
"workflow_step_traced",
|
||||||
|
{
|
||||||
|
"workflow_id": self._workflow_id,
|
||||||
|
"step": {"id": step.step_id, "title": step.title},
|
||||||
|
"input": inp,
|
||||||
|
"output": out,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
steps_buffer.append({"step_id": step.step_id, "title": step.title, "input": inp, "output": out})
|
||||||
|
trace.log(
|
||||||
|
"workflow_trace_flushed",
|
||||||
|
{"workflow_id": self._workflow_id, "steps": steps_buffer},
|
||||||
|
)
|
||||||
|
trace.log("workflow_completed", {"workflow_id": self._workflow_id})
|
||||||
|
return context
|
||||||
+25
@@ -0,0 +1,25 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DocExplainSummaryContext:
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
prompt_name: str
|
||||||
|
workflow_llm_enabled: bool = True
|
||||||
|
retrieval_plan: RetrievalPlan | None = None
|
||||||
|
retrieved_rows: list[dict] = field(default_factory=list)
|
||||||
|
rows: list[dict] = field(default_factory=list)
|
||||||
|
documents: list[RetrievedSummary] = field(default_factory=list)
|
||||||
|
gate_decision: EvidenceGateDecision | None = None
|
||||||
|
prompt_input: str = ""
|
||||||
|
answer: str = ""
|
||||||
|
answer_generated_payload: dict[str, object] | None = None
|
||||||
+26
@@ -0,0 +1,26 @@
|
|||||||
|
"""Протоколы контекста для workflow doc-explain summary."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from app.core.agent.runtime.execution_context import RuntimeExecutionContext
|
||||||
|
from app.core.agent.utils.process_v2.evidence.gate import EvidenceGateDecision
|
||||||
|
from app.core.agent.utils.process_v2.models import RetrievedSummary, V2RouteResult
|
||||||
|
from app.core.rag.retrieval.session_retriever import RetrievalPlan
|
||||||
|
|
||||||
|
|
||||||
|
class RetrievalWorkflowContext(Protocol):
|
||||||
|
runtime: RuntimeExecutionContext
|
||||||
|
route: V2RouteResult
|
||||||
|
rag_session_id: str
|
||||||
|
retrieval_plan: RetrievalPlan | None
|
||||||
|
retrieved_rows: list[dict]
|
||||||
|
rows: list[dict]
|
||||||
|
gate_decision: EvidenceGateDecision | None
|
||||||
|
answer: str
|
||||||
|
answer_generated_payload: dict[str, object] | None
|
||||||
|
|
||||||
|
|
||||||
|
class SummaryWorkflowContext(RetrievalWorkflowContext, Protocol):
|
||||||
|
documents: list[RetrievedSummary]
|
||||||
+106
@@ -0,0 +1,106 @@
|
|||||||
|
"""Логирование retrieval/pipeline/ranking для doc-explain summary."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.agent.utils.process_v2.models import V2RouteResult
|
||||||
|
from app.core.agent.utils.process_v2.rag_retrieval.target_doc_seeding import normalize_doc_path, row_path
|
||||||
|
|
||||||
|
|
||||||
|
def log_pipeline_step(runtime, step: str, payload: dict[str, object]) -> None:
|
||||||
|
runtime.trace.module("process.v2.pipeline").log(step, payload)
|
||||||
|
|
||||||
|
|
||||||
|
def log_retrieval_trace(runtime, route: V2RouteResult, plan, retrieved_rows: list[dict], metadata_rows: list[dict], rows: list[dict]) -> None:
|
||||||
|
runtime.trace.module("process.v2.rag_retrieval").log(
|
||||||
|
"rag_rows_fetched",
|
||||||
|
{
|
||||||
|
"profile": plan.profile,
|
||||||
|
"row_count": len(rows),
|
||||||
|
"rows": [trace_row(row) for row in rows],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
hinted_paths = {normalize_doc_path(hint) for hint in route.anchors.target_doc_hints if str(hint or "").strip()}
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"candidate_generation",
|
||||||
|
{
|
||||||
|
"query": route.user_query,
|
||||||
|
"profile": plan.profile,
|
||||||
|
"details": {
|
||||||
|
"target_doc_hints": list(route.anchors.target_doc_hints),
|
||||||
|
"candidates_before_ranking": [row_path(row) for row in rows if row_path(row)],
|
||||||
|
},
|
||||||
|
"resolved_aliases": route.anchors.matched_aliases,
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
"candidate_docs_before_ranking": [trace_row(row) for row in rows[:8]],
|
||||||
|
"sources": {
|
||||||
|
"seeded": [trace_row(row) for row in retrieved_rows[:5] if row_path(row) in hinted_paths],
|
||||||
|
"metadata_lookup": [trace_row(row) for row in metadata_rows[:5]],
|
||||||
|
"semantic": [trace_row(row) for row in retrieved_rows[:5]],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"retrieval_executed",
|
||||||
|
{
|
||||||
|
"query": route.user_query,
|
||||||
|
"profile": plan.profile,
|
||||||
|
"row_count": len(rows),
|
||||||
|
"target_doc_hints": route.anchors.target_doc_hints,
|
||||||
|
"top_results": [trace_row(row) for row in rows[:5]],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def log_ranking(runtime, items: list) -> None:
|
||||||
|
top_docs: list[dict[str, object]] = []
|
||||||
|
for item in items[:4]:
|
||||||
|
top_docs.append(
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score": getattr(item, "score", 0),
|
||||||
|
"match_reason": getattr(item, "match_reason", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"ranking_explained",
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score_breakdown": getattr(item, "score_breakdown", {}),
|
||||||
|
"score": getattr(item, "score", 0),
|
||||||
|
"match_reason": getattr(item, "match_reason", ""),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log_pipeline_step(
|
||||||
|
runtime,
|
||||||
|
"ranking_explained",
|
||||||
|
{
|
||||||
|
"top_docs_after_ranking": top_docs,
|
||||||
|
"ranking_score_breakdown": [
|
||||||
|
{
|
||||||
|
"doc": getattr(item, "path", ""),
|
||||||
|
"score_breakdown": getattr(item, "score_breakdown", {}),
|
||||||
|
}
|
||||||
|
for item in items[:4]
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def trace_row(row: dict) -> dict[str, object]:
|
||||||
|
metadata = row.get("metadata") or {}
|
||||||
|
content = str(row.get("content") or "").strip()
|
||||||
|
return {
|
||||||
|
"layer": str(row.get("layer") or ""),
|
||||||
|
"path": str(row.get("path") or ""),
|
||||||
|
"title": str(row.get("title") or ""),
|
||||||
|
"document_id": str(metadata.get("document_id") or metadata.get("doc_id") or row.get("document_id") or ""),
|
||||||
|
"entity_name": str(metadata.get("entity_name") or ""),
|
||||||
|
"summary_text": str(metadata.get("summary_text") or "")[:400],
|
||||||
|
"section_path": str(metadata.get("section_path") or ""),
|
||||||
|
"metadata_domain": str(metadata.get("domain") or ""),
|
||||||
|
"metadata_subdomain": str(metadata.get("subdomain") or ""),
|
||||||
|
"content_preview": content[:400],
|
||||||
|
}
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
"""DOC_UPDATE/FROM_FEATURE workflow package."""
|
||||||
|
|
||||||
+13
@@ -0,0 +1,13 @@
|
|||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.changeset_generator import (
|
||||||
|
DocRulesChangesetGenerator,
|
||||||
|
)
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.loader import DocRulesLoader
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DocRulesBundle",
|
||||||
|
"DocRulesChangesetGenerator",
|
||||||
|
"DocRulesLoader",
|
||||||
|
"DocRulesSelector",
|
||||||
|
]
|
||||||
+96
@@ -0,0 +1,96 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.models import DocRulesBundle
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.prompt_builder import DocChangePromptBuilder
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.doc_rules_pipeline.selector import DocRulesSelector
|
||||||
|
from app.core.agent.processes.v2.workflows.doc_update_from_feature.workflow_runtime.models import PlannedChange
|
||||||
|
from app.core.agent.utils.llm import AgentLlmService
|
||||||
|
from app.infra.observability.module_trace import ModuleTrace
|
||||||
|
from app.schemas.changeset import ChangeItem
|
||||||
|
|
||||||
|
|
||||||
|
class DocRulesChangesetGenerator:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
llm: AgentLlmService,
|
||||||
|
selector: DocRulesSelector | None = None,
|
||||||
|
prompt_builder: DocChangePromptBuilder | None = None,
|
||||||
|
) -> None:
|
||||||
|
self._llm = llm
|
||||||
|
self._selector = selector or DocRulesSelector()
|
||||||
|
self._prompt_builder = prompt_builder or DocChangePromptBuilder()
|
||||||
|
|
||||||
|
def generate(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
bundle: DocRulesBundle,
|
||||||
|
item: PlannedChange,
|
||||||
|
domain: str,
|
||||||
|
subdomain: str,
|
||||||
|
project_root: str,
|
||||||
|
trace: ModuleTrace | None = None,
|
||||||
|
) -> tuple[ChangeItem | None, str | None]:
|
||||||
|
selected = self._selector.select(bundle, item.doc_type)
|
||||||
|
if selected is None:
|
||||||
|
return None, f"Для doc_type '{item.doc_type}' не найдено полных doc_rules (artifact/template)."
|
||||||
|
prompt_input = self._prompt_builder.build(item=item, rules=selected, domain=domain, subdomain=subdomain)
|
||||||
|
if trace is not None:
|
||||||
|
trace.log(
|
||||||
|
"changeset_prompt_built",
|
||||||
|
{
|
||||||
|
"doc_type": item.doc_type,
|
||||||
|
"path": item.path,
|
||||||
|
"prompt_chars": len(prompt_input),
|
||||||
|
"rules_chars": len(selected.render_for_prompt()),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raw = self._llm.generate(
|
||||||
|
"v2_docs_update.build_doc_changeset",
|
||||||
|
prompt_input,
|
||||||
|
log_context="workflow.v2.docs_update.from_feature.changeset",
|
||||||
|
trace=trace,
|
||||||
|
)
|
||||||
|
payload = self._parse_json(raw)
|
||||||
|
if payload is None:
|
||||||
|
return None, f"LLM вернул невалидный JSON changeset для {item.path}."
|
||||||
|
payload["op"] = item.op
|
||||||
|
payload["path"] = item.path
|
||||||
|
payload["reason"] = str(payload.get("reason") or item.reason)[:500]
|
||||||
|
if item.op in {"update", "delete"}:
|
||||||
|
base_hash = self._resolve_base_hash(project_root, item.path)
|
||||||
|
if not base_hash:
|
||||||
|
return None, f"{item.op.upper()} пропущен: не удалось вычислить base_hash для {item.path}."
|
||||||
|
payload["base_hash"] = base_hash
|
||||||
|
if item.op == "delete":
|
||||||
|
payload.pop("proposed_content", None)
|
||||||
|
try:
|
||||||
|
return ChangeItem.model_validate(payload), None
|
||||||
|
except Exception as exc:
|
||||||
|
return None, f"Невалидный changeset item для {item.path}: {exc}"
|
||||||
|
|
||||||
|
def _parse_json(self, raw: str) -> dict[str, object] | None:
|
||||||
|
text = str(raw or "").strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
value = json.loads(text)
|
||||||
|
return value if isinstance(value, dict) else None
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _resolve_base_hash(self, project_root: str, rel_path: str) -> str:
|
||||||
|
root = Path(project_root or "").expanduser()
|
||||||
|
if not root.is_absolute():
|
||||||
|
return ""
|
||||||
|
target = root / rel_path
|
||||||
|
if not target.exists() or not target.is_file():
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
content = target.read_text(encoding="utf-8")
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user